1
0
mirror of https://github.com/JvanKatwijk/dabradio synced 2025-10-06 00:02:49 +02:00
This commit is contained in:
jan van katwijk
2019-03-20 12:54:53 +01:00
46 changed files with 587 additions and 8867 deletions

View File

@@ -129,7 +129,6 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
./includes/backend/data/mot
./includes/backend/data/journaline
./includes/support
./includes/support/viterbi_768
./includes/output
./devices
/usr/include/
@@ -175,10 +174,9 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
./includes/output/audio-base.h
./includes/output/newconverter.h
./includes/output/audiosink.h
./includes/support/viterbi_768/viterbi-768.h
./includes/support/viterbi-handler.h
./includes/support/fft-handler.h
./includes/support/ringbuffer.h
./includes/support/Xtan2.h
./includes/support/dab-params.h
./includes/support/band-handler.h
./includes/support/text-mapper.h
@@ -228,13 +226,11 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
./src/output/fir-filters.cpp
./src/output/audiosink.cpp
./src/support/fft-handler.cpp
./src/support/Xtan2.cpp
./src/support/dab-params.cpp
./src/support/band-handler.cpp
./src/support/text-mapper.cpp
./src/support/dab_tables.cpp
./src/support/viterbi_768/viterbi-768.cpp
./src/support/viterbi_768/spiral-no-sse.c
./src/support/viterbi-handler.cpp
)
set (${objectName}_MOCS

View File

@@ -133,7 +133,6 @@ endif ()
./includes/backend/data/journaline
./includes/backend/data/mot
./includes/support
./includes/support/viterbi_768
./includes/output
./devices
/usr/include/
@@ -178,10 +177,9 @@ endif ()
./includes/output/fir-filters.h
./includes/output/audio-base.h
./includes/output/newconverter.h
./includes/support/viterbi_768/viterbi-768.h
./includes/support/viterbi-handler.h
./includes/support/fft-handler.h
./includes/support/ringbuffer.h
./includes/support/Xtan2.h
./includes/support/dab-params.h
./includes/support/band-handler.h
./includes/support/text-mapper.h
@@ -230,13 +228,11 @@ endif ()
./src/output/newconverter.cpp
./src/output/fir-filters.cpp
./src/support/fft-handler.cpp
./src/support/Xtan2.cpp
./src/support/dab-params.cpp
./src/support/band-handler.cpp
./src/support/text-mapper.cpp
./src/support/dab_tables.cpp
./src/support/viterbi_768/viterbi-768.cpp
./src/support/viterbi_768/spiral-no-sse.c
./src/support/viterbi-handler.cpp
)
set (${objectName}_UIS

View File

@@ -3,7 +3,38 @@
dabradio is a Software for Windows, Linux and Raspberry Pi for listening to terrestrial Digital Audio Broadcasting (DAB and DAB+). It is the little brother of Qt-DAB.
NEW: the software should be able to run with the mirics SDRplay-duo
------------------------------------------------------------------------
NEW: a script to build an executable on an rpi
-----------------------------------------------------------------------
In order to ease building an executable on a Raspberry 2 or 3, a
script, "script-rpi.sh" is available that will do all
installations of required libraries, and build an executable
for an executable "dabradio-1.0", configured for DABsticks and sdrPlay
devices.
The script will load sources for a DABstick handler and build
and install the library.
An installer for the sdrPlay devices can be obtained from "www.sdrplay.com".
The script assumes an installation of Stretch on the RPI device,
it is assumed "git" is installed to fetch the source package.
sudo apt-get install git
git clone https://github.com/JvanKatwijk/dabradio
cd dabradio
chmod 777 script-rpi.sh
./script-rpi.sh
Note that the installer for the rtlsdr handler will create an "udev" file
(in "/etc/udev/rules.d"), that will be active only after a restart (or a restart of the udev subsystem).
During installations of the various packages, several times
one has to acknowledge when asked for, so while it takes a few minutes
it is wise to stay around.
The executable will be installed in the subdirectory "linux-bin".
------------------------------------------------------------------
Table of Contents
@@ -93,8 +124,9 @@ some gaps in the audio output.
Windows
------------------------------------------------------------------
Windows releases can be found at https://github.com/JvanKatwijk/dabradio/releases. The zipped folder found there contains
the executable for this and some other windows prohgrams, as well as the libraries required.
For Windows, an *installer* is available in the releases
section. The installer will aldo install - if not installed
already - the library for SDRplay support.
If you want to compile it by yourself, please install Qt
through its online installer, see https://www.qt.io/

View File

@@ -68,7 +68,7 @@ int32_t i;
this -> nrBlocks = params. get_L ();
this -> carriers = params. get_carriers ();
this -> carrierDiff = params. get_carrierDiff ();
this -> giveSignal = false;
this -> scanMode = false;
ofdmBuffer. resize (2 * T_s);
ofdmBufferIndex = 0;
@@ -92,9 +92,10 @@ int32_t i;
}
}
void dabProcessor::start (int frequency, bool giveSignal) {
this -> frequency = frequency;
this -> giveSignal = giveSignal;
void dabProcessor::start (int frequency, bool scanMode) {
this -> frequency = frequency;
this -> scanMode = scanMode;
startFailures = 0;
this -> QThread::start ();
}
@@ -137,7 +138,7 @@ notSynced:
break; // yes, we are ready
case NO_DIP_FOUND:
if (giveSignal && (++ attempts >= 5)) {
if (scanMode && (++ attempts >= 5)) {
emit (No_Signal_Found ());
attempts = 0;
}
@@ -174,9 +175,15 @@ SyncOnPhase:
if (!correctionNeeded) {
setSyncLost ();
}
startFailures ++;
if (scanMode && (startFailures > 3)) {
emit (No_Signal_Found ());
startFailures = 0;
}
goto notSynced;
}
startFailures = 0;
/**
* Once here, we are synchronized, we need to copy the data we
* used for synchronization for block 0
@@ -252,6 +259,21 @@ NewOffset:
*/
myReader. getSamples (ofdmBuffer. data (),
T_null, coarseOffset);
float sum = 0;
for (i = 0; i < T_null; i ++)
sum += abs (ofdmBuffer [i]);
sum /= T_null;
static float snr = 0;
snr = 0.9 * snr +
0.1 * 20 * log10 ((myReader. get_sLevel () + 0.005) / sum);
static int ccc = 0;
if (++ccc > 10) {
ccc = 0;
show_snr ((int)snr);
}
/**
* The first sample to be found for the next frame should be T_g
* samples ahead. Before going for the next frame, we

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB program
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#
@@ -92,7 +92,8 @@ private:
mscHandler my_mscHandler;
int32_t frequency;
int16_t attempts;
bool giveSignal;
bool scanMode;
int startFailures;
int32_t T_null;
int32_t T_u;
int32_t T_s;
@@ -112,7 +113,6 @@ private:
uint32_t ofdmSymbolCount;
phaseReference phaseSynchronizer;
ofdmDecoder my_ofdmDecoder;
bool wasSecond (int16_t, dabParams *);
virtual void run (void);
bool isReset;
signals:
@@ -120,8 +120,8 @@ signals:
void No_Signal_Found (void);
void setSyncLost (void);
void showCoordinates (int, int);
// void showCoordinates (float, float);
void show_Spectrum (int);
void show_snr (int);
};
#endif

69
dabradio-installer.iss Normal file
View File

@@ -0,0 +1,69 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
#define MyAppName "dabradio"
#define MyAppVersion "1.0"
#define MyAppPublisher "Lazy Chair Computing"
#define MyAppURL "https://github.com/JvanKatwijk/dabradio"
#define MyAppExeName "dabradio-1.0.exe";
[Setup]
; NOTE: The value of AppId uniquely identifies this application.
; Do not use the same AppId value in installers for other applications.
; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
AppId= {{B4C322AE-1C29-47E8-BF74-ED434065488D}
AppName={#MyAppName}
AppVersion={#MyAppVersion}
;AppVerName={#MyAppName} {#MyAppVersion}
AppPublisher={#MyAppPublisher}
AppPublisherURL={#MyAppURL}
AppSupportURL={#MyAppURL}
AppUpdatesURL={#MyAppURL}
DefaultDirName={pf}\{#MyAppName}
DisableProgramGroupPage=yes
LicenseFile=E:\sdr-j-development\windows-qt-dab\COPYRIGHT.this_software
InfoBeforeFile=E:\sdr-j-development\windows-dabradio\preamble.txt
OutputBaseFilename=setup-dabradio
Compression=lzma
SolidCompression=yes
[Languages]
Name: "english"; MessagesFile: "compiler:Default.isl"
[Tasks]
Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
[Files]
Source: "E:\sdr-j-development\windows-dabradio\dabradio-1.0.exe"; DestDir: "{app}"; Flags: ignoreversion
Source: "E:\sdr-j-development\windows-dabradio\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
Source: "E:\sdr-j-development\SDRplay_RSP_API-Windows-2.13.1.exe"; DestDir: "{app}"; AfterInstall : install_sdrplayApi
[Icons]
Name: "{commonprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
Name: "{commondesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
[Run]
Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
[code]
procedure install_sdrplayApi;
var
resultCode : Integer;
Names : TArrayOfString;
I : Integer;
found : Boolean;
begin
RegGetSubkeyNames(HKEY_LOCAL_MACHINE, 'SOFTWARE\MiricsSDR', Names);
for I := 0 to GetArrayLength(Names)-1 do
if Names [I] = 'API' then found := true;
if not found
then
begin
MsgBox ('Software\MiricsSDR\API not found', mbInformation, MB_OK);
Exec (ExpandConstant('{app}\SDRplay_RSP_API-Windows-2.13.1.exe'), '', '', SW_SHOWNORMAL,
ewWaitUntilTerminated, ResultCode);
end
end;

View File

@@ -33,7 +33,6 @@ DEPENDPATH += . \
./src/backend/data/mot \
./src/output \
./src/support \
./src/support/viterbi_768 \
./devices \
./devices/rawfiles \
./devices/wavfiles \
@@ -44,7 +43,6 @@ DEPENDPATH += . \
./includes/backend/data/mot \
./includes/output \
./includes/support \
./includes/support/viterbi_768
INCLUDEPATH += . \
./ \
@@ -59,7 +57,6 @@ INCLUDEPATH += . \
./includes/backend/data/mot \
./includes/output \
./includes/support \
./includes/support/viterbi_768 \
./devices \
./devices/rawfiles \
./devices/wavfiles
@@ -106,10 +103,9 @@ HEADERS += ./radio.h \
./includes/output/audio-base.h \
./includes/output/newconverter.h \
./includes/output/audiosink.h \
./includes/support/viterbi_768/viterbi-768.h \
./includes/support/viterbi-handler.h \
./includes/support/fft-handler.h \
./includes/support/ringbuffer.h \
./includes/support/Xtan2.h \
./includes/support/dab-params.h \
./includes/support/band-handler.h \
./includes/support/text-mapper.h \
@@ -158,9 +154,8 @@ SOURCES += ./main.cpp \
./src/output/audio-base.cpp \
./src/output/newconverter.cpp \
./src/output/audiosink.cpp \
./src/support/viterbi_768/viterbi-768.cpp \
./src/support/viterbi-handler.cpp \
./src/support/fft-handler.cpp \
./src/support/Xtan2.cpp \
./src/support/dab-params.cpp \
./src/support/band-handler.cpp \
./src/support/text-mapper.cpp \
@@ -200,8 +195,8 @@ LIBS += -lfaad
# (you obviously have libraries installed for the selected ones)
CONFIG += dabstick
CONFIG += sdrplay
CONFIG += airspy
CONFIG += hackrf
#CONFIG += airspy
#CONFIG += hackrf
#if you want to listen remote, uncomment
#CONFIG += tcp-streamer # use for remote listening
#otherwise, if you want to use the default qt way of soud out
@@ -214,13 +209,6 @@ DEFINES += __THREADED_BACKEND
#and this one is experimental
DEFINES += PRESET_NAME
#and these one is just experimental,
#NO_SSE is always safe
#CONFIG += NEON_RPI2
#CONFIG += NEON_RPI3
CONFIG += SSE
#CONFIG += NO_SSE
}
#
# an attempt to have it run under W32 through cross compilation
@@ -262,9 +250,7 @@ FORMS += ./forms/dabradio.ui
CONFIG += airspy
CONFIG += dabstick
CONFIG += sdrplay
CONFIG += hackrf
CONFIG += NO_SSE
#CONFIG += hackrf
#for the raspberry you definitely want this one
#when this one is enabled, load is spread over different threads
@@ -341,32 +327,3 @@ qt-audio {
./src/output/Qt-audiodevice.cpp
}
# for RPI2 use:
NEON_RPI2 {
DEFINES += NEON_AVAILABLE
QMAKE_CFLAGS += -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4
QMAKE_CXXFLAGS += -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4
HEADERS += ./src/support/viterbi_768/spiral-neon.h
SOURCES += ./src/support/viterbi_768/spiral-neon.c
}
# for RPI3 use:
NEON_RPI3 {
DEFINES += NEON_AVAILABLE
# QMAKE_CFLAGS += -mcpu=cortex-a53 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mneon-for-64bits
# QMAKE_CXXFLAGS += -mcpu=cortex-a53 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mneon-for-64bits
HEADERS += ./src/support/viterbi_768/spiral-neon.h
SOURCES += ./src/support/viterbi_768/spiral-neon.c
}
SSE {
DEFINES += SSE_AVAILABLE
HEADERS += ./src/support/viterbi_768/spiral-sse.h
SOURCES += ./src/support/viterbi_768/spiral-sse.c
}
NO_SSE {
HEADERS += ./src/support/viterbi_768/spiral-no-sse.h
SOURCES += ./src/support/viterbi_768/spiral-no-sse.c
}

View File

@@ -167,12 +167,12 @@ int i, k;
gains = new int [gainsCount];
gainsCount = rtlsdr_get_tuner_gains (device, gains);
for (i = gainsCount; i > 0; i--) {
fprintf(stderr, "%.1f ", gains [i - 1] / 10.0);
fprintf (stderr, "%.1f ", gains [i - 1] / 10.0);
}
fprintf(stderr, "\n");
rtlsdr_set_tuner_gain_mode (device, 1);
rtlsdr_set_agc_mode (device, 0);
_I_Buffer = new RingBuffer<uint8_t>(8 * 1024 * 1024);
//
// See what the saved values are and restore the GUI settings
@@ -212,7 +212,8 @@ int i, k;
#endif
return;
}
//
// library was open
stopReader ();
this -> rtlsdr_close (device);
@@ -233,7 +234,7 @@ int i, k;
}
//
//
bool rtlsdrHandler::restartReader (void) {
bool rtlsdrHandler::restartReader (int32_t frequency) {
int32_t r;
if (workerHandle != NULL)
@@ -244,7 +245,7 @@ int32_t r;
if (r < 0)
return false;
this -> rtlsdr_set_center_freq (device, lastFrequency);
this -> rtlsdr_set_center_freq (device, frequency);
workerHandle = new dll_driver (this);
rtlsdr_set_agc_mode (device, agcControl -> isChecked ());
rtlsdr_set_tuner_gain (device,
@@ -268,8 +269,10 @@ void rtlsdrHandler::stopReader (void) {
//
// when selecting the gain from a table, use the table value
void rtlsdrHandler::set_ifgain (int gain) {
fprintf (stderr, "gain will be set %d to %d\n",
gain, gains [gain * gainsCount / 100]);
rtlsdr_set_tuner_gain (device,
gain * gainsCount / 100);
gains [gain * gainsCount / 100]);
}
//
void rtlsdrHandler::set_agcControl (int dummy) {
@@ -277,21 +280,42 @@ void rtlsdrHandler::set_agcControl (int dummy) {
rtlsdr_set_tuner_gain (device,
gains [(int)(ifgainSelector -> value () * gainsCount / 100)]);
}
//
// we only have 8 bits, so rather than doing a float division to get
// the float value we want, we precompute the possibilities
static
float convTable [] = {
-128 / 128.0 , -127 / 128.0 , -126 / 128.0 , -125 / 128.0 , -124 / 128.0 , -123 / 128.0 , -122 / 128.0 , -121 / 128.0 , -120 / 128.0 , -119 / 128.0 , -118 / 128.0 , -117 / 128.0 , -116 / 128.0 , -115 / 128.0 , -114 / 128.0 , -113 / 128.0
, -112 / 128.0 , -111 / 128.0 , -110 / 128.0 , -109 / 128.0 , -108 / 128.0 , -107 / 128.0 , -106 / 128.0 , -105 / 128.0 , -104 / 128.0 , -103 / 128.0 , -102 / 128.0 , -101 / 128.0 , -100 / 128.0 , -99 / 128.0 , -98 / 128.0 , -97 / 128.0
, -96 / 128.0 , -95 / 128.0 , -94 / 128.0 , -93 / 128.0 , -92 / 128.0 , -91 / 128.0 , -90 / 128.0 , -89 / 128.0 , -88 / 128.0 , -87 / 128.0 , -86 / 128.0 , -85 / 128.0 , -84 / 128.0 , -83 / 128.0 , -82 / 128.0 , -81 / 128.0
, -80 / 128.0 , -79 / 128.0 , -78 / 128.0 , -77 / 128.0 , -76 / 128.0 , -75 / 128.0 , -74 / 128.0 , -73 / 128.0 , -72 / 128.0 , -71 / 128.0 , -70 / 128.0 , -69 / 128.0 , -68 / 128.0 , -67 / 128.0 , -66 / 128.0 , -65 / 128.0
, -64 / 128.0 , -63 / 128.0 , -62 / 128.0 , -61 / 128.0 , -60 / 128.0 , -59 / 128.0 , -58 / 128.0 , -57 / 128.0 , -56 / 128.0 , -55 / 128.0 , -54 / 128.0 , -53 / 128.0 , -52 / 128.0 , -51 / 128.0 , -50 / 128.0 , -49 / 128.0
, -48 / 128.0 , -47 / 128.0 , -46 / 128.0 , -45 / 128.0 , -44 / 128.0 , -43 / 128.0 , -42 / 128.0 , -41 / 128.0 , -40 / 128.0 , -39 / 128.0 , -38 / 128.0 , -37 / 128.0 , -36 / 128.0 , -35 / 128.0 , -34 / 128.0 , -33 / 128.0
, -32 / 128.0 , -31 / 128.0 , -30 / 128.0 , -29 / 128.0 , -28 / 128.0 , -27 / 128.0 , -26 / 128.0 , -25 / 128.0 , -24 / 128.0 , -23 / 128.0 , -22 / 128.0 , -21 / 128.0 , -20 / 128.0 , -19 / 128.0 , -18 / 128.0 , -17 / 128.0
, -16 / 128.0 , -15 / 128.0 , -14 / 128.0 , -13 / 128.0 , -12 / 128.0 , -11 / 128.0 , -10 / 128.0 , -9 / 128.0 , -8 / 128.0 , -7 / 128.0 , -6 / 128.0 , -5 / 128.0 , -4 / 128.0 , -3 / 128.0 , -2 / 128.0 , -1 / 128.0
, 0 / 128.0 , 1 / 128.0 , 2 / 128.0 , 3 / 128.0 , 4 / 128.0 , 5 / 128.0 , 6 / 128.0 , 7 / 128.0 , 8 / 128.0 , 9 / 128.0 , 10 / 128.0 , 11 / 128.0 , 12 / 128.0 , 13 / 128.0 , 14 / 128.0 , 15 / 128.0
, 16 / 128.0 , 17 / 128.0 , 18 / 128.0 , 19 / 128.0 , 20 / 128.0 , 21 / 128.0 , 22 / 128.0 , 23 / 128.0 , 24 / 128.0 , 25 / 128.0 , 26 / 128.0 , 27 / 128.0 , 28 / 128.0 , 29 / 128.0 , 30 / 128.0 , 31 / 128.0
, 32 / 128.0 , 33 / 128.0 , 34 / 128.0 , 35 / 128.0 , 36 / 128.0 , 37 / 128.0 , 38 / 128.0 , 39 / 128.0 , 40 / 128.0 , 41 / 128.0 , 42 / 128.0 , 43 / 128.0 , 44 / 128.0 , 45 / 128.0 , 46 / 128.0 , 47 / 128.0
, 48 / 128.0 , 49 / 128.0 , 50 / 128.0 , 51 / 128.0 , 52 / 128.0 , 53 / 128.0 , 54 / 128.0 , 55 / 128.0 , 56 / 128.0 , 57 / 128.0 , 58 / 128.0 , 59 / 128.0 , 60 / 128.0 , 61 / 128.0 , 62 / 128.0 , 63 / 128.0
, 64 / 128.0 , 65 / 128.0 , 66 / 128.0 , 67 / 128.0 , 68 / 128.0 , 69 / 128.0 , 70 / 128.0 , 71 / 128.0 , 72 / 128.0 , 73 / 128.0 , 74 / 128.0 , 75 / 128.0 , 76 / 128.0 , 77 / 128.0 , 78 / 128.0 , 79 / 128.0
, 80 / 128.0 , 81 / 128.0 , 82 / 128.0 , 83 / 128.0 , 84 / 128.0 , 85 / 128.0 , 86 / 128.0 , 87 / 128.0 , 88 / 128.0 , 89 / 128.0 , 90 / 128.0 , 91 / 128.0 , 92 / 128.0 , 93 / 128.0 , 94 / 128.0 , 95 / 128.0
, 96 / 128.0 , 97 / 128.0 , 98 / 128.0 , 99 / 128.0 , 100 / 128.0 , 101 / 128.0 , 102 / 128.0 , 103 / 128.0 , 104 / 128.0 , 105 / 128.0 , 106 / 128.0 , 107 / 128.0 , 108 / 128.0 , 109 / 128.0 , 110 / 128.0 , 111 / 128.0
, 112 / 128.0 , 113 / 128.0 , 114 / 128.0 , 115 / 128.0 , 116 / 128.0 , 117 / 128.0 , 118 / 128.0 , 119 / 128.0 , 120 / 128.0 , 121 / 128.0 , 122 / 128.0 , 123 / 128.0 , 124 / 128.0 , 125 / 128.0 , 126 / 128.0 , 127 / 128.0 };
//
// The brave old getSamples. For the dab stick, we get
// size samples: still in I/Q pairs, but we have to convert the data from
// uint8_t to std::complex<float> *
// uint8_t to DSPCOMPLEX *
int32_t rtlsdrHandler::getSamples (std::complex<float> *V, int32_t size) {
int32_t amount, i;
uint8_t *tempBuffer = (uint8_t *)alloca (2 * size * sizeof (uint8_t));
//
amount = _I_Buffer -> getDataFromBuffer (tempBuffer, 2 * size);
for (i = 0; i < amount / 2; i ++)
V [i] = std::complex<float>
((float (tempBuffer [2 * i] - 128)) / 128.0,
(float (tempBuffer [2 * i + 1] - 128)) / 128.0);
(convTable [tempBuffer [2 * i]],
convTable [tempBuffer [2 * i + 1]]);;
return amount / 2;
}

View File

@@ -72,7 +72,7 @@ public:
QCheckBox *);
~rtlsdrHandler (void);
// interface to the reader
bool restartReader (void);
bool restartReader (int32_t frequency);
void stopReader (void);
int32_t getSamples (std::complex<float> *, int32_t);
int32_t Samples (void);

View File

@@ -168,7 +168,7 @@ ULONG APIkeyValue_length = 255;
break;
}
sdrplaySettings -> beginGroup ("sdrplaySettings");
sdrplaySettings -> beginGroup ("sdrplaySettings");
int lnaState = sdrplaySettings -> value ("lnaState", 3). toInt ();
lnaGainSetting -> setValue (lnaState);
@@ -211,6 +211,11 @@ ULONG APIkeyValue_length = 255;
if (!libraryLoaded) // should not happen
return;
stopReader ();
sdrplaySettings -> beginGroup ("sdrplaySettings");
sdrplaySettings -> value ("lnaState", lnaGainSetting -> value ());
sdrplaySettings -> value ("GRdB", GRdBSelector -> value ());
sdrplaySettings -> endGroup ();
if (_I_Buffer != NULL)
delete _I_Buffer;
@@ -228,16 +233,22 @@ int GRdB = GRdBSelector -> value ();
int lnaState = lnaGainSetting -> value ();
(void)newGRdB;
if (!running. load ())
return;
err = my_mir_sdr_RSP_SetGr (GRdB, lnaState, 1, 0);
if (err != mir_sdr_Success)
fprintf (stderr, "Error at set_ifgain %s\n",
errorCodes (err). toLatin1 (). data ());
fprintf (stderr, "Error at set_ifgain %s (%d %d)\n",
errorCodes (err). toLatin1 (). data (),
GRdB, lnaState);
}
void sdrplayHandler::set_lnagainReduction (int lnaState) {
mir_sdr_ErrT err;
if (!running. load ())
return;
if (!agcControl -> isChecked ()) {
set_ifgainReduction (0);
return;

View File

@@ -371,9 +371,9 @@
<widget class="QProgressBar" name="ficQuality">
<property name="geometry">
<rect>
<x>30</x>
<y>40</y>
<width>151</width>
<x>10</x>
<y>30</y>
<width>121</width>
<height>21</height>
</rect>
</property>
@@ -387,9 +387,9 @@
<widget class="QProgressBar" name="audioQuality">
<property name="geometry">
<rect>
<x>187</x>
<y>40</y>
<width>171</width>
<x>140</x>
<y>30</y>
<width>151</width>
<height>21</height>
</rect>
</property>
@@ -400,6 +400,25 @@
<number>24</number>
</property>
</widget>
<widget class="QLCDNumber" name="snrDisplay">
<property name="geometry">
<rect>
<x>300</x>
<y>30</y>
<width>64</width>
<height>23</height>
</rect>
</property>
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="digitCount">
<number>3</number>
</property>
<property name="segmentStyle">
<enum>QLCDNumber::Flat</enum>
</property>
</widget>
</widget>
<widget class="QStatusBar" name="statusbar"/>
</widget>

View File

@@ -29,7 +29,7 @@
#include <stdio.h>
#include <stdint.h>
#include <vector>
#include "viterbi-768.h"
#include "viterbi-handler.h"
#include <QObject>
#include "fib-processor.h"
#include "dab-params.h"
@@ -46,7 +46,7 @@ public:
void stop (void);
void reset (void);
private:
viterbi_768 myViterbi;
viterbiHandler myViterbi;
dabParams params;
uint8_t bitBuffer_out [768];
int16_t ofdm_input [2304];

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB program
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __OFDM_DECODER__
@@ -55,7 +55,6 @@ public:
void decode (std::vector<std::complex<float> >,
int32_t n, int16_t *);
int16_t get_snr (std::complex<float> *);
void stop (void);
void reset (void);
private:
@@ -71,17 +70,13 @@ private:
int32_t T_g;
int32_t nrBlocks;
int32_t carriers;
int16_t getMiddle (void);
std::vector<complex<float>> phaseReference;
std::vector<int16_t> ibits;
std::complex<float> *fft_buffer;
phaseTable *phasetable;
int32_t blockIndex;
int16_t snrCount;
int16_t snr;
int16_t maxSignal;
signals:
void show_snr (int);
void showIQ (int);
void showQuality (float);

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB.
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB.
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
@@ -27,11 +27,11 @@
#include <stdint.h>
#include <vector>
#include "viterbi-768.h"
#include "viterbi-handler.h"
extern uint8_t PI_X [];
class protection: public viterbi_768 {
class protection: public viterbiHandler {
public:
protection (int16_t, int16_t);
virtual ~protection (void);
@@ -39,8 +39,8 @@ virtual bool deconvolve (int16_t *, int32_t, uint8_t *);
protected:
int16_t bitRate;
int32_t outSize;
std::vector<int16_t> viterbiBlock;
std::vector<uint8_t> indexTable;
std::vector<int16_t> viterbiBlock;
};
#endif

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB program
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#

View File

@@ -1,35 +0,0 @@
#
//
// This LUT implementation of atan2 is a C++ translation of
// a Java discussion on the net
// http://www.java-gaming.org/index.php?topic=14647.0
#ifndef __COMP_ATAN
#define __COMP_ATAN
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <cstdlib>
#include <limits>
#include "dab-constants.h"
#
class compAtan {
public:
compAtan (void);
~compAtan (void);
float atan2 (float, float);
float argX (std::complex<float>);
private:
float *ATAN2_TABLE_PPY;
float *ATAN2_TABLE_PPX;
float *ATAN2_TABLE_PNY;
float *ATAN2_TABLE_PNX;
float *ATAN2_TABLE_NPY;
float *ATAN2_TABLE_NPX;
float *ATAN2_TABLE_NNY;
float *ATAN2_TABLE_NNX;
float Stretch;
};
#endif

View File

@@ -0,0 +1,47 @@
#
/*
* Copyright (C) 2014 .. 2017
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of dabradio
*
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __VITERBI_HANDLER__
#define __VITERBI_HANDLER__
#include <stdint.h>
class viterbiHandler {
public:
viterbiHandler (int);
~viterbiHandler (void);
void deconvolve (int16_t *, uint8_t *);
private:
int costTable [16];
void computeCostTable (int16_t, int16_t, int16_t, int16_t);
uint8_t bitFor (int, int, int);
int blockLength;
int *stateSequence;
int **transCosts;
int **history;
};
#endif

View File

@@ -1,63 +0,0 @@
#
#ifndef __VITERBI__
#define __VITERBI__
/*
* Viterbi.h according to the SPIRAL project
*/
#include "dab-constants.h"
// For our particular viterbi decoder, we have
class viterbi {
#define RATE 4
#define NUMSTATES 64
#define BITS_PER_BYTE 8
// decision_t is a BIT vector
typedef union {
uint8_t t [NUMSTATES / BITS_PER_BYTE];
uint32_t w [NUMSTATES / 32];
uint16_t s [NUMSTATES / 16];
uint8_t c [NUMSTATES / 8];
} decision_t __attribute__ ((aligned (16)));
typedef union {
int16_t t[NUMSTATES];
} metric_t __attribute__ ((aligned (16)));
/*
* State info for instance of Viterbi decoder
*/
struct v {
/* path metric buffer 1 */
__attribute__ ((aligned (16))) metric_t metrics1;
/* path metric buffer 2 */
__attribute__ ((aligned (16))) metric_t metrics2;
/* Pointers to path metrics, swapped on every bit */
metric_t *old_metrics,*new_metrics;
decision_t *decisions; /* decisions */
};
public:
viterbi (int16_t);
~viterbi (void);
void deconvolve (int16_t *, uint8_t *);
private:
struct v vp;
int16_t Branchtab [NUMSTATES / 2 * RATE] __attribute__ ((aligned (16)));
int16_t parity (int16_t);
void init_viterbi (struct v *, int16_t);
void update_viterbi_blk_GENERIC (struct v *, int16_t *,
int16_t);
void chainback_viterbi (struct v *, uint8_t *, int16_t, uint16_t);
void BFLY (int32_t, int, int16_t *,
struct v *, decision_t *);
uint8_t *data;
int16_t *symbols;
int16_t frameBits;
};
#endif

View File

@@ -1,73 +0,0 @@
#
#ifndef __VITERBI_768__
#define __VITERBI_768__
/*
* Viterbi.h according to the SPIRAL project
*/
#include "dab-constants.h"
// For our particular viterbi decoder, we have
#define RATE 4
#define NUMSTATES 64
#define DECISIONTYPE uint32_t
//#define DECISIONTYPE uint8_t
//#define DECISIONTYPE_BITSIZE 8
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE uint32_t
//decision_t is a BIT vector
typedef union {
DECISIONTYPE t[NUMSTATES/DECISIONTYPE_BITSIZE];
uint32_t w[NUMSTATES/32];
uint16_t s[NUMSTATES/16];
uint8_t c[NUMSTATES/8];
} decision_t __attribute__ ((aligned (16)));
typedef union {
COMPUTETYPE t[NUMSTATES];
} metric_t __attribute__ ((aligned (16)));
/* State info for instance of Viterbi decoder
*/
struct v {
/* path metric buffer 1 */
__attribute__ ((aligned (16))) metric_t metrics1;
/* path metric buffer 2 */
__attribute__ ((aligned (16))) metric_t metrics2;
/* Pointers to path metrics, swapped on every bit */
metric_t *old_metrics,*new_metrics;
decision_t *decisions; /* decisions */
};
class viterbi_768 {
public:
viterbi_768 (int16_t, bool spiral = false);
~viterbi_768 (void);
void deconvolve (int16_t *, uint8_t *);
private:
bool spiral;
struct v vp;
COMPUTETYPE Branchtab [NUMSTATES / 2 * RATE] __attribute__ ((aligned (16)));
// int parityb (uint8_t);
int parity (int);
void partab_init (void);
// uint8_t Partab [256];
void init_viterbi (struct v *, int16_t);
void update_viterbi_blk_GENERIC (struct v *, COMPUTETYPE *,
int16_t);
void update_viterbi_blk_SPIRAL (struct v *, COMPUTETYPE *,
int16_t);
void chainback_viterbi (struct v *, uint8_t *, int16_t, uint16_t);
struct v *viterbi_alloc (int32_t);
void BFLY (int32_t, int, COMPUTETYPE *,
struct v *, decision_t *);
// uint8_t *bits;
uint8_t *data;
COMPUTETYPE *symbols;
int16_t frameBits;
};
#endif

View File

@@ -127,7 +127,7 @@ int opt;
fflush (stdout);
fflush (stderr);
qDebug ("It is done\n");
// delete MyRadioInterface;
delete MyRadioInterface;
delete dabSettings;
}

View File

@@ -170,6 +170,8 @@ QString h;
picturesPath);
connect (my_dabProcessor, SIGNAL (setSynced (char)),
this, SLOT (setSynced (char)));
connect (my_dabProcessor, SIGNAL (show_snr (int)),
this, SLOT (show_snr (int)));
//
serviceCharacteristics = NULL;
secondsTimer. setInterval (1000);
@@ -283,6 +285,7 @@ void RadioInterface::nextChannel (void) {
void RadioInterface::reset (void) {
my_dabProcessor -> stop ();
disconnect (ensembleDisplay,
SIGNAL (clicked (QModelIndex)),
this, SLOT (selectService (QModelIndex)));
@@ -394,7 +397,7 @@ void RadioInterface::showQuality (float f) {
}
void RadioInterface::show_snr (int s) {
(void)s;
snrDisplay -> display (s);
}
void RadioInterface::set_CorrectorDisplay (int c) {
@@ -531,15 +534,16 @@ void RadioInterface::TerminateProcess (void) {
displayTimer. stop ();
signalTimer. stop ();
inputDevice -> stopReader ();
my_dabProcessor -> stop (); // definitely concurrent
soundOut -> stop ();
// everything should be halted by now
delete soundOut;
if (inputDevice != NULL)
delete inputDevice;
fprintf (stderr, "going to delete dabProcessor\n");
delete my_dabProcessor;
fprintf (stderr, "deleted dabProcessor\n");
delete soundOut;
if (inputDevice != NULL)
delete inputDevice;
if (ensembleDisplay != NULL)
delete ensembleDisplay;
if (serviceDescription != NULL)

22
script-rpi.sh Normal file
View File

@@ -0,0 +1,22 @@
sudo apt-get update
sudo apt-get install qt4-qmake build-essential g++
sudo apt-get install libsndfile1-dev qt4-default libfftw3-dev portaudio19-dev
sudo apt-get install libfaad-dev zlib1g-dev libusb-1.0-0-dev mesa-common-dev
sudo apt-get install libgl1-mesa-dev libqt4-opengl-dev libsamplerate-dev libqwt-dev
wget http://sm5bsz.com/linuxdsp/hware/rtlsdr/rtl-sdr-linrad4.tbz
tar xvfj rtl-sdr-linrad4.tbz
cd rtl-sdr-linrad4
mkdir build
cd build
cmake .. -DDETACH_KERNEL_DRIVER=ON -DINSTALL_UDEV_RULES=ON
make
sudo make install
sudo ldconfig
cd ..
cd ..
qmake-qt4
make

View File

@@ -46,7 +46,7 @@
uint8_t dabMode):
params (dabMode),
fib_processor (mr),
myViterbi (768, true) {
myViterbi (768) {
int16_t i, j, k;
int local = 0;

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of Qt-DAB
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Once the bits are "in", interpretation and manipulation
@@ -68,12 +68,6 @@ int16_t i;
this -> T_g = T_s - T_u;
fft_buffer = my_fftHandler. getVector ();
phaseReference .resize (T_u);
connect (this, SIGNAL (show_snr (int)),
mr, SLOT (show_snr (int)));
snrCount = 0;
snr = 0;
}
ofdmDecoder::~ofdmDecoder (void) {
@@ -93,17 +87,6 @@ void ofdmDecoder::processBlock_0 (std::vector <std::complex<float> > buffer) {
T_u * sizeof (std::complex<float>));
my_fftHandler. do_FFT ();
/**
* The SNR is determined by looking at a segment of bins
* within the signal region and bits outside.
* It is just an indication
*/
if (++snrCount > 10) {
snr = 0.8 * snr + 0.2 * get_snr (fft_buffer);
// show_snr (snr);
snrCount = 0;
}
/**
* we are now in the frequency domain, and we keep the carriers
* as coming from the FFT as phase reference.
@@ -205,26 +188,4 @@ toBitsLabel:
}
}
}
/**
* for the snr we have a full T_u wide vector, with in the middle
* K carriers.
* Just get the strength from the selected carriers compared
* to the strength of the carriers outside that region
*/
int16_t ofdmDecoder::get_snr (std::complex<float> *v) {
int16_t i;
float noise = 0;
float signal = 0;
for (i = -100; i < 100; i ++)
noise += abs (v [(T_u / 2 + i)]);
noise /= 200;
for (i = - carriers / 4; i < carriers / 4; i ++)
signal += abs (v [(T_u + i) % T_u]);
signal /= (carriers / 2);
return 20 * log10 ((signal + 0.005) / (noise + 0.005));
}

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB.
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* The eep handling
@@ -159,7 +159,7 @@ int16_t inputCounter = 0;
if (indexTable [i])
viterbiBlock [i] = v [inputCounter ++];
viterbi_768::deconvolve (viterbiBlock. data (), outBuffer);
viterbiHandler::deconvolve (viterbiBlock. data (), outBuffer);
return true;
}

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB.
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*
@@ -26,11 +26,12 @@
#include "protection.h"
protection::protection (int16_t bitRate, int16_t protLevel):
viterbi_768 (24 * bitRate, false),
viterbiHandler (24 * bitRate),
outSize (24 * bitRate),
indexTable (outSize * 4 + 24),
viterbiBlock (outSize * 4 + 24){
this -> bitRate = bitRate;
(void)protLevel;
}
protection::~protection (void) {}
bool protection::deconvolve (int16_t *a,

View File

@@ -4,19 +4,19 @@
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB program
* Qt-DAB is free software; you can redistribute it and/or modify
* This file is part of the dabradio
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* The deconvolution for uep
@@ -231,6 +231,8 @@ bool uep_protection::deconvolve (int16_t *v,
uint8_t *outBuffer) {
int16_t i;
int16_t inputCounter = 0;
(void)size;
// clear the bits in the viterbiBlock,
// only the non-punctured ones are set
memset (viterbiBlock. data (), 0,
@@ -240,6 +242,6 @@ int16_t inputCounter = 0;
for (i = 0; i < outSize * 4 + 24; i ++)
if (indexTable [i])
viterbiBlock [i] = v [inputCounter ++];
viterbi_768::deconvolve (viterbiBlock. data (), outBuffer);
viterbiHandler::deconvolve (viterbiBlock. data (), outBuffer);
return true;
}

View File

@@ -1,100 +0,0 @@
#
//
// This LUT implementation of atan2 is a C++ translation of
// a Java discussion on the net
// http://www.java-gaming.org/index.php?topic=14647.0
#include "Xtan2.h"
#define SIZE 8192
#define EZIS (-SIZE)
compAtan::compAtan (void) {
Stretch = M_PI;
// private static final int SIZE = 1024;
// private static final float Stretch = (float)Math.PI;
// Output will swing from -Stretch to Stretch (default: Math.PI)
// Useful to change to 1 if you would normally do "atan2(y, x) / Math.PI"
ATAN2_TABLE_PPY = new float [SIZE + 1];
ATAN2_TABLE_PPX = new float [SIZE + 1];
ATAN2_TABLE_PNY = new float [SIZE + 1];
ATAN2_TABLE_PNX = new float [SIZE + 1];
ATAN2_TABLE_NPY = new float [SIZE + 1];
ATAN2_TABLE_NPX = new float [SIZE + 1];
ATAN2_TABLE_NNY = new float [SIZE + 1];
ATAN2_TABLE_NNX = new float [SIZE + 1];
for (int i = 0; i <= SIZE; i++) {
float f = (float)i / SIZE;
ATAN2_TABLE_PPY [i] = atan(f) * Stretch / M_PI;
ATAN2_TABLE_PPX [i] = Stretch * 0.5f - ATAN2_TABLE_PPY[i];
ATAN2_TABLE_PNY [i] = -ATAN2_TABLE_PPY [i];
ATAN2_TABLE_PNX [i] = ATAN2_TABLE_PPY [i] - Stretch * 0.5f;
ATAN2_TABLE_NPY [i] = Stretch - ATAN2_TABLE_PPY [i];
ATAN2_TABLE_NPX [i] = ATAN2_TABLE_PPY [i] + Stretch * 0.5f;
ATAN2_TABLE_NNY [i] = ATAN2_TABLE_PPY [i] - Stretch;
ATAN2_TABLE_NNX [i] = -Stretch * 0.5f - ATAN2_TABLE_PPY [i];
}
}
compAtan::~compAtan (void) {
delete ATAN2_TABLE_PPY;
delete ATAN2_TABLE_PPX;
delete ATAN2_TABLE_PNY;
delete ATAN2_TABLE_PNX;
delete ATAN2_TABLE_NPY;
delete ATAN2_TABLE_NPX;
delete ATAN2_TABLE_NNY;
delete ATAN2_TABLE_NNX;
}
/**
* ATAN2 : performance degrades due to the many "0" tests
*/
float compAtan::atan2 (float y, float x) {
if (x == 0) {
if (y == 0) return 0;
// return std::numeric_limits<float>::infinity ();
else
if (y > 0)
return M_PI / 2;
else // y < 0
return - M_PI / 2;
}
if (x > 0) {
if (y >= 0) {
if (x >= y)
return ATAN2_TABLE_PPY[(int)(SIZE * y / x + 0.5)];
else
return ATAN2_TABLE_PPX[(int)(SIZE * x / y + 0.5)];
}
else {
if (x >= -y)
return ATAN2_TABLE_PNY[(int)(EZIS * y / x + 0.5)];
else
return ATAN2_TABLE_PNX[(int)(EZIS * x / y + 0.5)];
}
}
else {
if (y >= 0) {
if (-x >= y)
return ATAN2_TABLE_NPY[(int)(EZIS * y / x + 0.5)];
else
return ATAN2_TABLE_NPX[(int)(EZIS * x / y + 0.5)];
}
else {
if (x <= y) // (-x >= -y)
return ATAN2_TABLE_NNY[(int)(SIZE * y / x + 0.5)];
else
return ATAN2_TABLE_NNX[(int)(SIZE * x / y + 0.5)];
}
}
}
float compAtan::argX (std::complex<float> v) {
return this -> atan2 (imag (v), real (v));
}

View File

@@ -0,0 +1,244 @@
#
/*
* Copyright (C) 2014 .. 2017
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of dabradio
*
* dabradio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* dabradio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dabradio; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "viterbi-handler.h"
#include <stdio.h>
#define K 7
#define Poly1 0133
#define Poly2 0171
#define Poly3 0145
#define Poly4 0133
#define numofStates (1 << (K - 1))
static int predecessor_for_0 [numofStates];
static int predecessor_for_1 [numofStates];
static int16_t indexTable [2 * numofStates];
viterbiHandler::viterbiHandler (int blockLength) {
int i, j;
this -> blockLength = blockLength;
transCosts = new int *[blockLength + 6 + 1];
history = new int *[blockLength + 6 + 1];
stateSequence = new int [blockLength + 6 + 1];
//
for (i = 0; i < blockLength + 6; i++) {
transCosts [i] = new int [numofStates];
history [i] = new int [numofStates];
stateSequence [i] = 0;
for (j = 0; j < numofStates; j ++) {
transCosts [i][j] = 0;
history [i][j] = 0;
}
}
// These tables give a mapping from (state * bit * Poly -> outputbit)
uint8_t poly1_table [2 * numofStates];
for (i = 0; i < 2; i ++)
for (j = 0; j < numofStates; j ++)
poly1_table [i * numofStates + j] = bitFor (j, Poly1, i);
int8_t poly2_table [2 * numofStates];
for (i = 0; i < 2; i ++)
for (j = 0; j < numofStates; j ++)
poly2_table [i * numofStates + j] = bitFor (j, Poly2, i);
uint8_t poly3_table [2 * numofStates];
for (i = 0; i < 2; i ++)
for (j = 0; j < numofStates; j ++)
poly3_table [i * numofStates + j] = bitFor (j, Poly3, i);
uint8_t poly4_table [2 * numofStates];
for (i = 0; i < 2; i ++)
for (j = 0; j < numofStates; j ++)
poly4_table [i * numofStates + j] = bitFor (j, Poly4, i);
// The indextable maps the four bits we get from the polynomes
// to an index, used in computing the costs
for (i = 0; i < 2 * numofStates; i ++)
indexTable [i] = (int16_t) (
((poly1_table [i] != 0) ? 8 : 0) +
((poly2_table [i] != 0) ? 4 : 0) +
((poly3_table [i] != 0) ? 2 : 0) +
((poly4_table [i] != 0) ? 1 : 0));
for (i = 0; i < numofStates; i ++) {
predecessor_for_0 [i] = ((i << 1) + 00) & (numofStates - 1);
predecessor_for_1 [i] = ((i << 1) + 01) & (numofStates - 1);
}
}
viterbiHandler::~viterbiHandler (void) {
int i;
//
for (int i = 0; i < blockLength + 6; i++) {
delete [] transCosts [i];
delete [] history [i];
}
delete [] transCosts;
delete [] history;
delete [] stateSequence;
}
// Note that the soft bits are such that
// they are int16_t -255 -> (bit)1, +255 -> (bit)0
void viterbiHandler::computeCostTable (int16_t sym_0,
int16_t sym_1,
int16_t sym_2, int16_t sym_3) {
costTable [0] = - sym_0 - sym_1 - sym_2 - sym_3;
costTable [1] = - sym_0 - sym_1 - sym_2 + sym_3;
costTable [2] = - sym_0 - sym_1 + sym_2 - sym_3;
costTable [3] = - sym_0 - sym_1 + sym_2 + sym_3;
costTable [4] = - sym_0 + sym_1 - sym_2 - sym_3;
costTable [5] = - sym_0 + sym_1 - sym_2 + sym_3;
costTable [6] = - sym_0 + sym_1 + sym_2 - sym_3;
costTable [7] = - sym_0 + sym_1 + sym_2 + sym_3;
costTable [8] = + sym_0 - sym_1 - sym_2 - sym_3;
costTable [9] = + sym_0 - sym_1 - sym_2 + sym_3;
costTable [10] = + sym_0 - sym_1 + sym_2 - sym_3;
costTable [11] = + sym_0 - sym_1 + sym_2 + sym_3;
costTable [12] = + sym_0 + sym_1 - sym_2 - sym_3;
costTable [13] = + sym_0 + sym_1 - sym_2 + sym_3;
costTable [14] = + sym_0 + sym_1 + sym_2 - sym_3;
costTable [15] = + sym_0 + sym_1 + sym_2 + sym_3;
}
// block is the sequence of soft bits
// its length = 4 * blockLength + 4 * 6
void viterbiHandler::deconvolve (int16_t *sym, uint8_t *bitBuffer) {
int prev_0, prev_1;
int costs_0, costs_1;
int i;
// first step is to "pump" the soft bits into the state machine
// and compute the cost matrix.
// we assume the overall costs for state 0 are zero
// and remain zero
for (i = 1; i < blockLength + 6; i ++) {
int16_t sym_0 = (int16_t)(- sym [4 * (i - 1) + 0]);
int16_t sym_1 = (int16_t)(- sym [4 * (i - 1) + 1]);
int16_t sym_2 = (int16_t)(- sym [4 * (i - 1) + 2]);
int16_t sym_3 = (int16_t)(- sym [4 * (i - 1) + 3]);
int *transCosts_i = transCosts [i];
int *history_i = history [i];
computeCostTable (sym_0, sym_1, sym_2, sym_3);
for (int cState = 0; cState < numofStates / 2; cState ++) {
uint8_t entrybit = 0;
prev_0 = predecessor_for_0 [cState];
prev_1 = predecessor_for_1 [cState];
// we compute the minimal costs, based on the costs of the
// prev states, and the additional costs of arriving from
// the previous state to the current state with the symbol "sym"
//
// entrybit = 0, so the index for the cost function is prev_xx
costs_0 = transCosts [i - 1] [prev_0] +
costTable [indexTable [prev_0]];
costs_1 = transCosts [i - 1] [prev_1] +
costTable [indexTable [prev_1]];
if (costs_0 < costs_1) {
transCosts_i [cState] = costs_0;
history_i [cState] = prev_0;
} else {
transCosts_i [cState] = costs_1;
history_i [cState] = prev_1;
}
}
for (int cState = numofStates / 2;
cState < numofStates; cState ++) {
uint8_t entrybit = 1;
prev_0 = predecessor_for_0 [cState];
prev_1 = predecessor_for_1 [cState];
// we compute the minimal costs, based on the costs of the
// prev states, and the additional costs of arriving from
// the previous state to the current state with the symbol row "sym"
//
// entrybit is here "1", so the index is id cost function
// is prev_xx + NumofStates
costs_0 = transCosts [i - 1] [prev_0] +
costTable [indexTable [prev_0 + numofStates]];
costs_1 = transCosts [i - 1] [prev_1] +
costTable [indexTable [prev_1 + numofStates]];
if (costs_0 < costs_1) {
transCosts_i [cState] = costs_0;
history_i [cState] = prev_0;
} else {
transCosts_i [cState] = costs_1;
history_i [cState] = prev_1;
}
}
}
// Once all costs are computed, we can look for the minimal cost
// Our "end state" is somewhere in column blockLength + 6
int minimalCosts = 1000000;
int bestState = 0;
for (i = 0; i < numofStates; i++) {
if (transCosts [blockLength + 6 - 1][i] < minimalCosts) {
minimalCosts = transCosts [blockLength + 6 - 1][i];
bestState = i;
}
}
stateSequence [blockLength + 6 - 1] = bestState;
/*
* Trace backgoes back to state 0, and builds up the
* sequence of decoded symbols
*/
for (i = blockLength + 6 - 1; i > 0; i --)
stateSequence [i - 1] = history [i][stateSequence[i]];
for (i = 1; i <= blockLength; i++)
bitBuffer [i - 1] =
(uint8_t) ((stateSequence [i] >= numofStates / 2) ? 01 : 00);
}
/*
* as an aid, we give a function "bitFor" that, given
* the register state, the polynome and the bit to be inserted
* returns the bit coming from the engine
*/
uint8_t viterbiHandler::bitFor (int state, int poly, int bit) {
int theRegister;
uint8_t resBit = 0;
int i;
//
// the register after shifting "bit" in would be:
theRegister = bit == 0 ? state : (state + numofStates);
theRegister &= poly;
/*
* now for the individual bits
*/
for (int i = 0; i <= K; i++) {
resBit ^= (uint8_t)(theRegister & 01);
theRegister >>= 1;
}
return resBit;
}

View File

@@ -1,296 +0,0 @@
#
/*
* Copyright (C) 2013
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB program
* Qt-DAB is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* This viterbi decoder is used for deconvolving the data segments
* for audio and/or data. The code is
* as given by the Spiral Project. All rights gratefully acknowledged.
* decoder
*/
#include <stdio.h>
#include <stdlib.h>
#include "mm_malloc.h"
#include "viterbi.h"
#include <cstring>
#ifdef __MINGW32__
#include <intrin.h>
#include <malloc.h>
#include <windows.h>
#endif
//
// It took a while to discover that the polynomes I used
// in a "home" made implementation was bitreversed!!
// The official one is on top.
#define K 7
#define POLYS {0155, 0117, 0123, 0155}
//#define POLYS {109, 79, 83, 109}
// In the reversed form the polys look:
//#define POLYS { 0133, 0171, 0145, 0133 }
//#define POLYS { 91, 121, 101, 91 }
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 137
//
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
#if (K-1<8)
#define ADDSHIFT (8-(K-1))
#define SUBSHIFT 0
#elif (K-1>8)
#define ADDSHIFT 0
#define SUBSHIFT ((K-1)-8)
#else
#define ADDSHIFT 0
#define SUBSHIFT 0
#endif
static uint8_t Partab [] =
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
//
// One could create the table above, i.e. a 256 entry
// odd-parity lookup table by the following function
// It is now precomputed
static
void partab_init (void){
int16_t i,cnt,ti;
for (i = 0; i < 256; i++){
cnt = 0;
ti = i;
while (ti != 0) {
if (ti & 1) cnt++;
ti >>= 1;
}
Partab [i] = cnt & 1;
}
}
int16_t viterbi::parity (int16_t x){
/* Fold down to one byte */
x ^= (x >> 8);
return Partab [x];
}
static inline
void renormalize (int16_t* X, int16_t threshold){
int32_t i;
if (X [0] > threshold){
int16_t min = X [0];
for (i = 0; i < NUMSTATES; i++)
if (min > X[i])
min = X[i];
for (i = 0; i < NUMSTATES; i++)
X[i] -= min;
}
}
viterbi::viterbi (int16_t wordlength) {
int polys [RATE] = POLYS;
frameBits = wordlength;
int16_t i, state;
#ifdef __MINGW32__
uint32_t size;
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0x0F;
data = (uint8_t *)_aligned_malloc (size, 16);
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof (int16_t) + 1 + 16) & ~0x0F;
symbols = (int16_t *)_aligned_malloc (size, 16);
size = 2 * ((wordlength + (K - 1)) * sizeof (decision_t) + 16) & ~0x0F;
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
#else
if (posix_memalign ((void**)&data, 16,
(wordlength + (K - 1))/ 8 + 1)){
printf("Allocation of data array failed\n");
}
if (posix_memalign ((void**)&symbols, 16,
RATE * (wordlength + (K - 1)) * sizeof(int16_t))){
printf("Allocation of symbols array failed\n");
}
if (posix_memalign ((void**)&(vp. decisions),
16,
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
printf ("Allocation of vp decisions failed\n");
}
#endif
for (state = 0; state < NUMSTATES / 2; state++) {
for (i = 0; i < RATE; i++)
Branchtab [i * NUMSTATES / 2 + state] =
(polys[i] < 0) ^
parity((2 * state) & abs (polys[i])) ? 255 : 0;
}
init_viterbi (&vp, 0);
}
viterbi::~viterbi (void) {
#ifdef __MINGW32__
_aligned_free (vp. decisions);
_aligned_free (data);
_aligned_free (symbols);
#else
free (vp. decisions);
free (data);
free (symbols);
#endif
}
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
static inline
uint8_t getbit (uint8_t v, int32_t o) {
return (v & maskTable [o]) ? 1 : 0;
}
void viterbi::deconvolve (int16_t *input, uint8_t *output) {
int16_t i;
init_viterbi (&vp, 0);
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
int16_t temp = input [i] + 127;
if (temp < 0) temp = 0;
if (temp > 255) temp = 255;
symbols [i] = temp;
}
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
chainback_viterbi (&vp, data, frameBits, 0);
for (i = 0; i < (int16_t)frameBits; i ++)
output [i] = getbit (data [i >> 3], i & 07);
}
/* C-language butterfly */
void viterbi::BFLY (int i, int s, int16_t * syms,
struct v * vp, decision_t * d) {
int32_t j, decision0, decision1;
int16_t metric, m0, m1, m2, m3;
metric = 0;
for (j = 0; j < RATE;j++)
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s * RATE + j]) >>
METRICSHIFT ;
metric = metric >> PRECISIONSHIFT;
const int16_t max =
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
m0 = vp -> old_metrics->t [i] + metric;
m1 = vp -> old_metrics->t [i + NUMSTATES / 2] + (max - metric);
m2 = vp -> old_metrics->t [i] + (max - metric);
m3 = vp -> old_metrics->t [i + NUMSTATES / 2] + metric;
decision0 = ((int32_t)(m0 - m1)) > 0;
decision1 = ((int32_t)(m2 - m3)) > 0;
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
}
/*
* Update decoder with a block of demodulated symbols
* Note that nbits is the number of decoded data bits, not the number
* of symbols!
*/
void viterbi::update_viterbi_blk_GENERIC (struct v *vp,
int16_t *syms, int16_t nbits){
decision_t *d = (decision_t *)vp -> decisions;
int32_t s, i;
for (s = 0; s < nbits; s++)
memset (&d [s], 0, sizeof (decision_t));
for (s = 0; s < nbits; s++){
void *tmp;
for (i = 0; i < NUMSTATES / 2; i++)
BFLY (i, s, syms, vp, vp -> decisions);
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
// Swap pointers to old and new metrics
tmp = vp -> old_metrics;
vp -> old_metrics = vp -> new_metrics;
vp -> new_metrics = (metric_t *)tmp;
}
}
/*
* Viterbi chainback
*/
void viterbi::chainback_viterbi (struct v *vp,
uint8_t *data, /* Decoded output data */
int16_t nbits, /* Number of data bits */
uint16_t endstate){ /*Terminal encoder state */
decision_t *d = vp -> decisions;
/*
* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate = (endstate % NUMSTATES) << ADDSHIFT;
/*
* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += (K - 1); /* Look past tail */
while (nbits-- != 0){
int k;
// int l = (endstate >> ADDSHIFT) / 32;
// int m = (endstate >> ADDSHIFT) % 32;
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
((endstate>>ADDSHIFT) % 32)) & 1;
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
data [nbits >> 3] = endstate >> SUBSHIFT;
}
}
/* Initialize Viterbi decoder for start of new frame */
void viterbi::init_viterbi (struct v *p, int16_t starting_state){
struct v *vp = p;
int32_t i;
for (i = 0; i < NUMSTATES; i++)
vp -> metrics1.t[i] = 63;
vp -> old_metrics = &vp -> metrics1;
vp -> new_metrics = &vp -> metrics2;
/* Bias known start state */
vp -> old_metrics-> t [starting_state & (NUMSTATES-1)] = 0;
}

View File

@@ -1,11 +0,0 @@
The viterbi implementation is copied from the spiral one, all
rights gratefully acknowledged.
Since we serve more than a single platform, we do not use the SSE
implementation.
The particular spiral implementation (see the file spiral_no_sse.c"
is generated for the wordsize and the other parameters for FIC blocks.
The implementation therefore has a "switch", that - when set to true -
selects the spiral implementation, and - when set to false (the default) -
it uses the generic implementation.

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
#define K 7
#define RATE 4
#define POLYS { 109, 79, 83, 109 }
#define NUMSTATES 64
#define FRAMEBITS 2048
#define DECISIONTYPE unsigned int
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE unsigned int
#define EBN0 3
#define TRIALS 10000
#define __int32 int
#define FUNC FULL_SPIRAL
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 2000000000

View File

@@ -1,698 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
//#include <include/mm_malloc.h>
//#include <pmmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#include <mmintrin.h>
#include "spiral-sse.h"
void init_FULL_SPIRAL() {
}
void FULL_SPIRAL_sse(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
int i9;
// for(i9 = 0; i9 <= amount; i9++) {
for(i9 = 0; i9 < amount; i9++) {
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
, a840;
int a820, a850;
unsigned char s118, s125, s132, s139, s146, s153, s160
, s167, s174, s181, s188, s195, s202, s209, s216
, s223;
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
, *a839, *b104;
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
, *a977, *a998;
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
, *a999;
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
, a829, a831, a832, a835, a837, a838, a841, a843
, a844, a845, a846, a847, a848, a857, a858, a860
, a861, a863, a864, a866, a867, a868, a869, a870
, a871, a878, a879, a881, a882, a884, a885, a887
, a888, a889, a890, a891, a892, a899, a900, a902
, a903, a905, a906, a908, a909, a910, a911, a912
, a913, a920, a921, a923, a924, a926, a927, a929
, a930, a931, a932, a933, a934, a941, a942, a944
, a945, a947, a948, a950, a951, a952, a953, a954
, a955, a962, a963, a965, a966, a968, a969, a971
, a972, a973, a974, a975, a976, a983, a984, a986
, a987, a989, a990, a992, a993, a994, a995, a996
, a997, b105, b106, b107, b108, b109, b110, b111
, b112, b113, b114, b115, b116, b117, b118, b119
, b120, b121, b122, b123, b124, b125, b126, b127
, b128, b129, b130, b131, b132, b133, b134, b135
, b136, d37, d38, d39, d40, d41, d42, d43
, d44, d45, d46, d47, d48, d49, d50, d51
, d52, d53, d54, d55, d56, d57, d58, d59
, d60, d61, d62, d63, d64, d65, d66, d67
, d68, m100, m101, m102, m103, m104, m105, m106
, m107, m108, m109, m110, m111, m112, m113, m114
, m115, m116, m117, m118, m119, m120, m121, m122
, m123, m124, m125, m126, m127, m128, m129, m130
, m131, m132, m133, m134, m135, m136, m73, m74
, m75, m76, m77, m78, m79, m80, m81, m82
, m83, m84, m85, m86, m87, m88, m89, m90
, m91, m92, m93, m94, m95, m96, m97, m98
, m99, s114, s115, s116, s117, s119, s120, s121
, s122, s123, s124, s126, s127, s128, s129, s130
, s131, s133, s134, s135, s136, s137, s138, s140
, s141, s142, s143, s144, s145, s147, s148, s149
, s150, s151, s152, s154, s155, s156, s157, s158
, s159, s161, s162, s163, s164, s165, s166, s168
, s169, s170, s171, s172, s173, s175, s176, s177
, s178, s179, s180, s182, s183, s184, s185, s186
, s187, s189, s190, s191, s192, s193, s194, s196
, s197, s198, s199, s200, s201, s203, s204, s205
, s206, s207, s208, s210, s211, s212, s213, s214
, s215, s217, s218, s219, s220, s221, s222, s224
, s225, t39, t40, t41, t42, t43, t44, t45
, t46, t47, t48, t49, t50, t51, t52, t53
, t54, t55, t56, t57, t58, t59, t60, t61
, t62, t63, t64, t65, t66, t67, t68, t69
, t70;
a818 = ((__m128i *) X);
s114 = *(a818);
a819 = (a818 + 8);
s115 = *(a819);
a820 = (8 * i9);
a821 = (syms + a820);
a822 = *(a821);
a823 = _mm_set1_epi32(a822);
a824 = ((__m128i *) Branchtab);
a825 = *(a824);
a826 = _mm_xor_si128(a823, a825);
b104 = (a820 + syms);
a827 = (b104 + 1);
a828 = *(a827);
a829 = _mm_set1_epi32(a828);
a830 = (a824 + 8);
a831 = *(a830);
a832 = _mm_xor_si128(a829, a831);
a833 = (b104 + 2);
a834 = *(a833);
a835 = _mm_set1_epi32(a834);
a836 = (a824 + 16);
a837 = *(a836);
a838 = _mm_xor_si128(a835, a837);
a839 = (b104 + 3);
a840 = *(a839);
a841 = _mm_set1_epi32(a840);
a842 = (a824 + 24);
a843 = *(a842);
a844 = _mm_xor_si128(a841, a843);
b105 = _mm_add_epi32(a826, a832);
b106 = _mm_add_epi32(b105, a838);
t39 = _mm_add_epi32(b106, a844);
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
m73 = _mm_add_epi32(s114, t39);
m74 = _mm_add_epi32(s115, t40);
m75 = _mm_add_epi32(s114, t40);
m76 = _mm_add_epi32(s115, t39);
d37 = _mm_cmpgt_epi32(m73, m74);
d38 = _mm_cmpgt_epi32(m75, m76);
a845 = _mm_andnot_si128(d37, m73);
a846 = _mm_and_si128(d37, m74);
s116 = _mm_or_si128(a845, a846);
a847 = _mm_andnot_si128(d38, m75);
a848 = _mm_and_si128(d38, m76);
s117 = _mm_or_si128(a847, a848);
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
a849 = ((unsigned char *) dec);
a850 = (16 * i9);
a851 = (a849 + a850);
*(a851) = s118;
s119 = _mm_unpacklo_epi32(s116, s117);
s120 = _mm_unpackhi_epi32(s116, s117);
a852 = ((__m128i *) Y);
*(a852) = s119;
a853 = (a852 + 1);
*(a853) = s120;
a854 = (a818 + 1);
s121 = *(a854);
a855 = (a818 + 9);
s122 = *(a855);
a856 = (a824 + 1);
a857 = *(a856);
a858 = _mm_xor_si128(a823, a857);
a859 = (a824 + 9);
a860 = *(a859);
a861 = _mm_xor_si128(a829, a860);
a862 = (a824 + 17);
a863 = *(a862);
a864 = _mm_xor_si128(a835, a863);
a865 = (a824 + 25);
a866 = *(a865);
a867 = _mm_xor_si128(a841, a866);
b107 = _mm_add_epi32(a858, a861);
b108 = _mm_add_epi32(b107, a864);
t41 = _mm_add_epi32(b108, a867);
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
m77 = _mm_add_epi32(s121, t41);
m78 = _mm_add_epi32(s122, t42);
m79 = _mm_add_epi32(s121, t42);
m80 = _mm_add_epi32(s122, t41);
d39 = _mm_cmpgt_epi32(m77, m78);
d40 = _mm_cmpgt_epi32(m79, m80);
a868 = _mm_andnot_si128(d39, m77);
a869 = _mm_and_si128(d39, m78);
s123 = _mm_or_si128(a868, a869);
a870 = _mm_andnot_si128(d40, m79);
a871 = _mm_and_si128(d40, m80);
s124 = _mm_or_si128(a870, a871);
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
a872 = (a851 + 1);
*(a872) = s125;
s126 = _mm_unpacklo_epi32(s123, s124);
s127 = _mm_unpackhi_epi32(s123, s124);
a873 = (a852 + 2);
*(a873) = s126;
a874 = (a852 + 3);
*(a874) = s127;
a875 = (a818 + 2);
s128 = *(a875);
a876 = (a818 + 10);
s129 = *(a876);
a877 = (a824 + 2);
a878 = *(a877);
a879 = _mm_xor_si128(a823, a878);
a880 = (a824 + 10);
a881 = *(a880);
a882 = _mm_xor_si128(a829, a881);
a883 = (a824 + 18);
a884 = *(a883);
a885 = _mm_xor_si128(a835, a884);
a886 = (a824 + 26);
a887 = *(a886);
a888 = _mm_xor_si128(a841, a887);
b109 = _mm_add_epi32(a879, a882);
b110 = _mm_add_epi32(b109, a885);
t43 = _mm_add_epi32(b110, a888);
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
m81 = _mm_add_epi32(s128, t43);
m82 = _mm_add_epi32(s129, t44);
m83 = _mm_add_epi32(s128, t44);
m84 = _mm_add_epi32(s129, t43);
d41 = _mm_cmpgt_epi32(m81, m82);
d42 = _mm_cmpgt_epi32(m83, m84);
a889 = _mm_andnot_si128(d41, m81);
a890 = _mm_and_si128(d41, m82);
s130 = _mm_or_si128(a889, a890);
a891 = _mm_andnot_si128(d42, m83);
a892 = _mm_and_si128(d42, m84);
s131 = _mm_or_si128(a891, a892);
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
a893 = (a851 + 2);
*(a893) = s132;
s133 = _mm_unpacklo_epi32(s130, s131);
s134 = _mm_unpackhi_epi32(s130, s131);
a894 = (a852 + 4);
*(a894) = s133;
a895 = (a852 + 5);
*(a895) = s134;
a896 = (a818 + 3);
s135 = *(a896);
a897 = (a818 + 11);
s136 = *(a897);
a898 = (a824 + 3);
a899 = *(a898);
a900 = _mm_xor_si128(a823, a899);
a901 = (a824 + 11);
a902 = *(a901);
a903 = _mm_xor_si128(a829, a902);
a904 = (a824 + 19);
a905 = *(a904);
a906 = _mm_xor_si128(a835, a905);
a907 = (a824 + 27);
a908 = *(a907);
a909 = _mm_xor_si128(a841, a908);
b111 = _mm_add_epi32(a900, a903);
b112 = _mm_add_epi32(b111, a906);
t45 = _mm_add_epi32(b112, a909);
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
m85 = _mm_add_epi32(s135, t45);
m86 = _mm_add_epi32(s136, t46);
m87 = _mm_add_epi32(s135, t46);
m88 = _mm_add_epi32(s136, t45);
d43 = _mm_cmpgt_epi32(m85, m86);
d44 = _mm_cmpgt_epi32(m87, m88);
a910 = _mm_andnot_si128(d43, m85);
a911 = _mm_and_si128(d43, m86);
s137 = _mm_or_si128(a910, a911);
a912 = _mm_andnot_si128(d44, m87);
a913 = _mm_and_si128(d44, m88);
s138 = _mm_or_si128(a912, a913);
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
a914 = (a851 + 3);
*(a914) = s139;
s140 = _mm_unpacklo_epi32(s137, s138);
s141 = _mm_unpackhi_epi32(s137, s138);
a915 = (a852 + 6);
*(a915) = s140;
a916 = (a852 + 7);
*(a916) = s141;
a917 = (a818 + 4);
s142 = *(a917);
a918 = (a818 + 12);
s143 = *(a918);
a919 = (a824 + 4);
a920 = *(a919);
a921 = _mm_xor_si128(a823, a920);
a922 = (a824 + 12);
a923 = *(a922);
a924 = _mm_xor_si128(a829, a923);
a925 = (a824 + 20);
a926 = *(a925);
a927 = _mm_xor_si128(a835, a926);
a928 = (a824 + 28);
a929 = *(a928);
a930 = _mm_xor_si128(a841, a929);
b113 = _mm_add_epi32(a921, a924);
b114 = _mm_add_epi32(b113, a927);
t47 = _mm_add_epi32(b114, a930);
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
m89 = _mm_add_epi32(s142, t47);
m90 = _mm_add_epi32(s143, t48);
m91 = _mm_add_epi32(s142, t48);
m92 = _mm_add_epi32(s143, t47);
d45 = _mm_cmpgt_epi32(m89, m90);
d46 = _mm_cmpgt_epi32(m91, m92);
a931 = _mm_andnot_si128(d45, m89);
a932 = _mm_and_si128(d45, m90);
s144 = _mm_or_si128(a931, a932);
a933 = _mm_andnot_si128(d46, m91);
a934 = _mm_and_si128(d46, m92);
s145 = _mm_or_si128(a933, a934);
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
a935 = (a851 + 4);
*(a935) = s146;
s147 = _mm_unpacklo_epi32(s144, s145);
s148 = _mm_unpackhi_epi32(s144, s145);
a936 = (a852 + 8);
*(a936) = s147;
a937 = (a852 + 9);
*(a937) = s148;
a938 = (a818 + 5);
s149 = *(a938);
a939 = (a818 + 13);
s150 = *(a939);
a940 = (a824 + 5);
a941 = *(a940);
a942 = _mm_xor_si128(a823, a941);
a943 = (a824 + 13);
a944 = *(a943);
a945 = _mm_xor_si128(a829, a944);
a946 = (a824 + 21);
a947 = *(a946);
a948 = _mm_xor_si128(a835, a947);
a949 = (a824 + 29);
a950 = *(a949);
a951 = _mm_xor_si128(a841, a950);
b115 = _mm_add_epi32(a942, a945);
b116 = _mm_add_epi32(b115, a948);
t49 = _mm_add_epi32(b116, a951);
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
m93 = _mm_add_epi32(s149, t49);
m94 = _mm_add_epi32(s150, t50);
m95 = _mm_add_epi32(s149, t50);
m96 = _mm_add_epi32(s150, t49);
d47 = _mm_cmpgt_epi32(m93, m94);
d48 = _mm_cmpgt_epi32(m95, m96);
a952 = _mm_andnot_si128(d47, m93);
a953 = _mm_and_si128(d47, m94);
s151 = _mm_or_si128(a952, a953);
a954 = _mm_andnot_si128(d48, m95);
a955 = _mm_and_si128(d48, m96);
s152 = _mm_or_si128(a954, a955);
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
a956 = (a851 + 5);
*(a956) = s153;
s154 = _mm_unpacklo_epi32(s151, s152);
s155 = _mm_unpackhi_epi32(s151, s152);
a957 = (a852 + 10);
*(a957) = s154;
a958 = (a852 + 11);
*(a958) = s155;
a959 = (a818 + 6);
s156 = *(a959);
a960 = (a818 + 14);
s157 = *(a960);
a961 = (a824 + 6);
a962 = *(a961);
a963 = _mm_xor_si128(a823, a962);
a964 = (a824 + 14);
a965 = *(a964);
a966 = _mm_xor_si128(a829, a965);
a967 = (a824 + 22);
a968 = *(a967);
a969 = _mm_xor_si128(a835, a968);
a970 = (a824 + 30);
a971 = *(a970);
a972 = _mm_xor_si128(a841, a971);
b117 = _mm_add_epi32(a963, a966);
b118 = _mm_add_epi32(b117, a969);
t51 = _mm_add_epi32(b118, a972);
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
m97 = _mm_add_epi32(s156, t51);
m98 = _mm_add_epi32(s157, t52);
m99 = _mm_add_epi32(s156, t52);
m100 = _mm_add_epi32(s157, t51);
d49 = _mm_cmpgt_epi32(m97, m98);
d50 = _mm_cmpgt_epi32(m99, m100);
a973 = _mm_andnot_si128(d49, m97);
a974 = _mm_and_si128(d49, m98);
s158 = _mm_or_si128(a973, a974);
a975 = _mm_andnot_si128(d50, m99);
a976 = _mm_and_si128(d50, m100);
s159 = _mm_or_si128(a975, a976);
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
a977 = (a851 + 6);
*(a977) = s160;
s161 = _mm_unpacklo_epi32(s158, s159);
s162 = _mm_unpackhi_epi32(s158, s159);
a978 = (a852 + 12);
*(a978) = s161;
a979 = (a852 + 13);
*(a979) = s162;
a980 = (a818 + 7);
s163 = *(a980);
a981 = (a818 + 15);
s164 = *(a981);
a982 = (a824 + 7);
a983 = *(a982);
a984 = _mm_xor_si128(a823, a983);
a985 = (a824 + 15);
a986 = *(a985);
a987 = _mm_xor_si128(a829, a986);
a988 = (a824 + 23);
a989 = *(a988);
a990 = _mm_xor_si128(a835, a989);
a991 = (a824 + 31);
a992 = *(a991);
a993 = _mm_xor_si128(a841, a992);
b119 = _mm_add_epi32(a984, a987);
b120 = _mm_add_epi32(b119, a990);
t53 = _mm_add_epi32(b120, a993);
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
m101 = _mm_add_epi32(s163, t53);
m102 = _mm_add_epi32(s164, t54);
m103 = _mm_add_epi32(s163, t54);
m104 = _mm_add_epi32(s164, t53);
d51 = _mm_cmpgt_epi32(m101, m102);
d52 = _mm_cmpgt_epi32(m103, m104);
a994 = _mm_andnot_si128(d51, m101);
a995 = _mm_and_si128(d51, m102);
s165 = _mm_or_si128(a994, a995);
a996 = _mm_andnot_si128(d52, m103);
a997 = _mm_and_si128(d52, m104);
s166 = _mm_or_si128(a996, a997);
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
a998 = (a851 + 7);
*(a998) = s167;
s168 = _mm_unpacklo_epi32(s165, s166);
s169 = _mm_unpackhi_epi32(s165, s166);
a999 = (a852 + 14);
*(a999) = s168;
a1000 = (a852 + 15);
*(a1000) = s169;
s170 = *(a852);
s171 = *(a936);
a1001 = (b104 + 4);
a1002 = *(a1001);
a1003 = _mm_set1_epi32(a1002);
a1004 = _mm_xor_si128(a1003, a825);
a1005 = (b104 + 5);
a1006 = *(a1005);
a1007 = _mm_set1_epi32(a1006);
a1008 = _mm_xor_si128(a1007, a831);
a1009 = (b104 + 6);
a1010 = *(a1009);
a1011 = _mm_set1_epi32(a1010);
a1012 = _mm_xor_si128(a1011, a837);
a1013 = (b104 + 7);
a1014 = *(a1013);
a1015 = _mm_set1_epi32(a1014);
a1016 = _mm_xor_si128(a1015, a843);
b121 = _mm_add_epi32(a1004, a1008);
b122 = _mm_add_epi32(b121, a1012);
t55 = _mm_add_epi32(b122, a1016);
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
m105 = _mm_add_epi32(s170, t55);
m106 = _mm_add_epi32(s171, t56);
m107 = _mm_add_epi32(s170, t56);
m108 = _mm_add_epi32(s171, t55);
d53 = _mm_cmpgt_epi32(m105, m106);
d54 = _mm_cmpgt_epi32(m107, m108);
a1017 = _mm_andnot_si128(d53, m105);
a1018 = _mm_and_si128(d53, m106);
s172 = _mm_or_si128(a1017, a1018);
a1019 = _mm_andnot_si128(d54, m107);
a1020 = _mm_and_si128(d54, m108);
s173 = _mm_or_si128(a1019, a1020);
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
a1021 = (a851 + 8);
*(a1021) = s174;
s175 = _mm_unpacklo_epi32(s172, s173);
s176 = _mm_unpackhi_epi32(s172, s173);
*(a818) = s175;
*(a854) = s176;
s177 = *(a853);
s178 = *(a937);
a1022 = _mm_xor_si128(a1003, a857);
a1023 = _mm_xor_si128(a1007, a860);
a1024 = _mm_xor_si128(a1011, a863);
a1025 = _mm_xor_si128(a1015, a866);
b123 = _mm_add_epi32(a1022, a1023);
b124 = _mm_add_epi32(b123, a1024);
t57 = _mm_add_epi32(b124, a1025);
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
m109 = _mm_add_epi32(s177, t57);
m110 = _mm_add_epi32(s178, t58);
m111 = _mm_add_epi32(s177, t58);
m112 = _mm_add_epi32(s178, t57);
d55 = _mm_cmpgt_epi32(m109, m110);
d56 = _mm_cmpgt_epi32(m111, m112);
a1026 = _mm_andnot_si128(d55, m109);
a1027 = _mm_and_si128(d55, m110);
s179 = _mm_or_si128(a1026, a1027);
a1028 = _mm_andnot_si128(d56, m111);
a1029 = _mm_and_si128(d56, m112);
s180 = _mm_or_si128(a1028, a1029);
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
a1030 = (a851 + 9);
*(a1030) = s181;
s182 = _mm_unpacklo_epi32(s179, s180);
s183 = _mm_unpackhi_epi32(s179, s180);
*(a875) = s182;
*(a896) = s183;
s184 = *(a873);
s185 = *(a957);
a1031 = _mm_xor_si128(a1003, a878);
a1032 = _mm_xor_si128(a1007, a881);
a1033 = _mm_xor_si128(a1011, a884);
a1034 = _mm_xor_si128(a1015, a887);
b125 = _mm_add_epi32(a1031, a1032);
b126 = _mm_add_epi32(b125, a1033);
t59 = _mm_add_epi32(b126, a1034);
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
m113 = _mm_add_epi32(s184, t59);
m114 = _mm_add_epi32(s185, t60);
m115 = _mm_add_epi32(s184, t60);
m116 = _mm_add_epi32(s185, t59);
d57 = _mm_cmpgt_epi32(m113, m114);
d58 = _mm_cmpgt_epi32(m115, m116);
a1035 = _mm_andnot_si128(d57, m113);
a1036 = _mm_and_si128(d57, m114);
s186 = _mm_or_si128(a1035, a1036);
a1037 = _mm_andnot_si128(d58, m115);
a1038 = _mm_and_si128(d58, m116);
s187 = _mm_or_si128(a1037, a1038);
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
a1039 = (a851 + 10);
*(a1039) = s188;
s189 = _mm_unpacklo_epi32(s186, s187);
s190 = _mm_unpackhi_epi32(s186, s187);
*(a917) = s189;
*(a938) = s190;
s191 = *(a874);
s192 = *(a958);
a1040 = _mm_xor_si128(a1003, a899);
a1041 = _mm_xor_si128(a1007, a902);
a1042 = _mm_xor_si128(a1011, a905);
a1043 = _mm_xor_si128(a1015, a908);
b127 = _mm_add_epi32(a1040, a1041);
b128 = _mm_add_epi32(b127, a1042);
t61 = _mm_add_epi32(b128, a1043);
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
m117 = _mm_add_epi32(s191, t61);
m118 = _mm_add_epi32(s192, t62);
m119 = _mm_add_epi32(s191, t62);
m120 = _mm_add_epi32(s192, t61);
d59 = _mm_cmpgt_epi32(m117, m118);
d60 = _mm_cmpgt_epi32(m119, m120);
a1044 = _mm_andnot_si128(d59, m117);
a1045 = _mm_and_si128(d59, m118);
s193 = _mm_or_si128(a1044, a1045);
a1046 = _mm_andnot_si128(d60, m119);
a1047 = _mm_and_si128(d60, m120);
s194 = _mm_or_si128(a1046, a1047);
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
a1048 = (a851 + 11);
*(a1048) = s195;
s196 = _mm_unpacklo_epi32(s193, s194);
s197 = _mm_unpackhi_epi32(s193, s194);
*(a959) = s196;
*(a980) = s197;
s198 = *(a894);
s199 = *(a978);
a1049 = _mm_xor_si128(a1003, a920);
a1050 = _mm_xor_si128(a1007, a923);
a1051 = _mm_xor_si128(a1011, a926);
a1052 = _mm_xor_si128(a1015, a929);
b129 = _mm_add_epi32(a1049, a1050);
b130 = _mm_add_epi32(b129, a1051);
t63 = _mm_add_epi32(b130, a1052);
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
m121 = _mm_add_epi32(s198, t63);
m122 = _mm_add_epi32(s199, t64);
m123 = _mm_add_epi32(s198, t64);
m124 = _mm_add_epi32(s199, t63);
d61 = _mm_cmpgt_epi32(m121, m122);
d62 = _mm_cmpgt_epi32(m123, m124);
a1053 = _mm_andnot_si128(d61, m121);
a1054 = _mm_and_si128(d61, m122);
s200 = _mm_or_si128(a1053, a1054);
a1055 = _mm_andnot_si128(d62, m123);
a1056 = _mm_and_si128(d62, m124);
s201 = _mm_or_si128(a1055, a1056);
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
a1057 = (a851 + 12);
*(a1057) = s202;
s203 = _mm_unpacklo_epi32(s200, s201);
s204 = _mm_unpackhi_epi32(s200, s201);
*(a819) = s203;
*(a855) = s204;
s205 = *(a895);
s206 = *(a979);
a1058 = _mm_xor_si128(a1003, a941);
a1059 = _mm_xor_si128(a1007, a944);
a1060 = _mm_xor_si128(a1011, a947);
a1061 = _mm_xor_si128(a1015, a950);
b131 = _mm_add_epi32(a1058, a1059);
b132 = _mm_add_epi32(b131, a1060);
t65 = _mm_add_epi32(b132, a1061);
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
m125 = _mm_add_epi32(s205, t65);
m126 = _mm_add_epi32(s206, t66);
m127 = _mm_add_epi32(s205, t66);
m128 = _mm_add_epi32(s206, t65);
d63 = _mm_cmpgt_epi32(m125, m126);
d64 = _mm_cmpgt_epi32(m127, m128);
a1062 = _mm_andnot_si128(d63, m125);
a1063 = _mm_and_si128(d63, m126);
s207 = _mm_or_si128(a1062, a1063);
a1064 = _mm_andnot_si128(d64, m127);
a1065 = _mm_and_si128(d64, m128);
s208 = _mm_or_si128(a1064, a1065);
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
a1066 = (a851 + 13);
*(a1066) = s209;
s210 = _mm_unpacklo_epi32(s207, s208);
s211 = _mm_unpackhi_epi32(s207, s208);
*(a876) = s210;
*(a897) = s211;
s212 = *(a915);
s213 = *(a999);
a1067 = _mm_xor_si128(a1003, a962);
a1068 = _mm_xor_si128(a1007, a965);
a1069 = _mm_xor_si128(a1011, a968);
a1070 = _mm_xor_si128(a1015, a971);
b133 = _mm_add_epi32(a1067, a1068);
b134 = _mm_add_epi32(b133, a1069);
t67 = _mm_add_epi32(b134, a1070);
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
m129 = _mm_add_epi32(s212, t67);
m130 = _mm_add_epi32(s213, t68);
m131 = _mm_add_epi32(s212, t68);
m132 = _mm_add_epi32(s213, t67);
d65 = _mm_cmpgt_epi32(m129, m130);
d66 = _mm_cmpgt_epi32(m131, m132);
a1071 = _mm_andnot_si128(d65, m129);
a1072 = _mm_and_si128(d65, m130);
s214 = _mm_or_si128(a1071, a1072);
a1073 = _mm_andnot_si128(d66, m131);
a1074 = _mm_and_si128(d66, m132);
s215 = _mm_or_si128(a1073, a1074);
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
a1075 = (a851 + 14);
*(a1075) = s216;
s217 = _mm_unpacklo_epi32(s214, s215);
s218 = _mm_unpackhi_epi32(s214, s215);
*(a918) = s217;
*(a939) = s218;
s219 = *(a916);
s220 = *(a1000);
a1076 = _mm_xor_si128(a1003, a983);
a1077 = _mm_xor_si128(a1007, a986);
a1078 = _mm_xor_si128(a1011, a989);
a1079 = _mm_xor_si128(a1015, a992);
b135 = _mm_add_epi32(a1076, a1077);
b136 = _mm_add_epi32(b135, a1078);
t69 = _mm_add_epi32(b136, a1079);
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
m133 = _mm_add_epi32(s219, t69);
m134 = _mm_add_epi32(s220, t70);
m135 = _mm_add_epi32(s219, t70);
m136 = _mm_add_epi32(s220, t69);
d67 = _mm_cmpgt_epi32(m133, m134);
d68 = _mm_cmpgt_epi32(m135, m136);
a1080 = _mm_andnot_si128(d67, m133);
a1081 = _mm_and_si128(d67, m134);
s221 = _mm_or_si128(a1080, a1081);
a1082 = _mm_andnot_si128(d68, m135);
a1083 = _mm_and_si128(d68, m136);
s222 = _mm_or_si128(a1082, a1083);
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
a1084 = (a851 + 15);
*(a1084) = s223;
s224 = _mm_unpacklo_epi32(s221, s222);
s225 = _mm_unpackhi_epi32(s221, s222);
*(a960) = s224;
*(a981) = s225;
}
/* skip */
}

View File

@@ -1,36 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
#include <stdint.h>
#define K 7
#define RATE 4
#define POLYS { 109, 79, 83, 109 }
#define NUMSTATES 64
#define FRAMEBITS 2048
#define DECISIONTYPE unsigned int
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE uint32_t
#define EBN0 3
#define TRIALS 10000
#define __int32 int
#define FUNC FULL_SPIRAL
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 2000000000

View File

@@ -1,370 +0,0 @@
#
/*
* Copyright (C) 201 .. 2017
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of Qt-DAB
* Qt-DAB is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* The convolutional decoder for the FIC blocks has fixed sized
* blocks, so we can use pre-generated code - for that specific
* sized blocks - generated by the spiral project
*/
#include <stdio.h>
#include <stdlib.h>
#include "mm_malloc.h"
#include "viterbi-768.h"
#include <cstring>
#ifdef __MINGW32__
#include <intrin.h>
#include <malloc.h>
#include <windows.h>
#endif
//
// It took a while to discover that the polynomes we used
// in our own "straightforward" implementation was bitreversed!!
// The official one is on top.
#define K 7
#define POLYS { 0155, 0117, 0123, 0155}
//#define POLYS {109, 79, 83, 109}
// In the reversed form the polys look:
//#define POLYS { 0133, 0171, 0145, 0133 }
//#define POLYS { 91, 121, 101, 91 }
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 137
//
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
#if (K-1<8)
#define ADDSHIFT (8-(K-1))
#define SUBSHIFT 0
#elif (K-1>8)
#define ADDSHIFT 0
#define SUBSHIFT ((K-1)-8)
#else
#define ADDSHIFT 0
#define SUBSHIFT 0
#endif
static uint8_t Partab [] =
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
//
// One could create the table above, i.e. a 256 entry
// odd-parity lookup table by the following function
// It is now precomputed
void viterbi_768::partab_init (void){
int16_t i,cnt,ti;
for (i = 0; i < 256; i++){
cnt = 0;
ti = i;
while (ti != 0) {
if (ti & 1) cnt++;
ti >>= 1;
}
Partab [i] = cnt & 1;
}
}
int viterbi_768::parity (int x){
/* Fold down to one byte */
x ^= (x >> 16);
x ^= (x >> 8);
return Partab [x];
}
static inline
void renormalize (COMPUTETYPE* X, COMPUTETYPE threshold){
int32_t i;
if (X [0] > threshold){
COMPUTETYPE min = X [0];
for (i = 0; i < NUMSTATES; i++)
if (min > X[i])
min = X[i];
for (i = 0; i < NUMSTATES; i++)
X[i] -= min;
}
}
//
//
// The main use of the viterbi decoder is in handling the FIC blocks
// There are (in mode 1) 3 ofdm blocks, giving 4 FIC blocks
// There all have a predefined length. In that case we use the
// "fast" (i.e. spiral) code, otherwise we use the generic code
viterbi_768::viterbi_768 (int16_t wordlength, bool spiral) {
int polys [RATE] = POLYS;
int16_t i, state;
#ifdef __MINGW32__
uint32_t size;
#endif
frameBits = wordlength;
this -> spiral = spiral;
// partab_init ();
// B I G N O T E The spiral code uses (wordLength + (K - 1) * sizeof ...
// However, the application then crashes, so something is not OK
// By doubling the size, the problem disappears. It is not solved though
// and not further investigation.
#ifdef __MINGW32__
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0xF;
data = (uint8_t *)_aligned_malloc (size, 16);
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE) + 16) & ~0xF;
symbols = (COMPUTETYPE *)_aligned_malloc (size, 16);
size = 2 * (wordlength + (K - 1)) * sizeof (decision_t);
size = (size + 16) & ~0xF;
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
#else
if (posix_memalign ((void**)&data, 16,
(wordlength + (K - 1))/ 8 + 1)){
printf("Allocation of data array failed\n");
}
if (posix_memalign ((void**)&symbols, 16,
RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE))){
printf("Allocation of symbols array failed\n");
}
if (posix_memalign ((void**)&(vp. decisions),
16,
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
printf ("Allocation of vp decisions failed\n");
}
#endif
for (state = 0; state < NUMSTATES / 2; state++) {
for (i = 0; i < RATE; i++)
Branchtab [i * NUMSTATES / 2 + state] =
(polys[i] < 0) ^
parity((2 * state) & abs (polys[i])) ? 255 : 0;
}
//
init_viterbi (&vp, 0);
}
viterbi_768::~viterbi_768 (void) {
#ifdef __MINGW32__
_aligned_free (vp. decisions);
_aligned_free (data);
_aligned_free (symbols);
#else
free (vp. decisions);
free (data);
free (symbols);
#endif
}
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
static inline
uint8_t getbit (uint8_t v, int32_t o) {
return (v & maskTable [o]) ? 1 : 0;
}
// depends: POLYS, RATE, COMPUTETYPE
// encode was only used for testing purposes
//void encode (/*const*/ unsigned char *bytes, COMPUTETYPE *symbols, int nbits) {
//int i, k;
//int polys [RATE] = POLYS;
//int sr = 0;
//
//// FIXME: this is slowish
//// -- remember about the padding!
// for (i = 0; i < nbits + (K - 1); i++) {
// int b = bytes[i/8];
// int j = i % 8;
// int bit = (b >> (7-j)) & 1;
//
// sr = (sr << 1) | bit;
// for (k = 0; k < RATE; k++)
// *(symbols++) = parity(sr & polys[k]);
// }
//}
// Note that our DAB environment maps the softbits to -127 .. 127
// we have to map that onto 0 .. 255
void viterbi_768::deconvolve (int16_t *input, uint8_t *output) {
uint32_t i;
init_viterbi (&vp, 0);
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
int16_t temp = input [i] + 127;
if (temp < 0) temp = 0;
if (temp > 255) temp = 255;
symbols [i] = temp;
}
if (!spiral)
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
else
update_viterbi_blk_SPIRAL (&vp, symbols, frameBits + (K - 1));
chainback_viterbi (&vp, data, frameBits, 0);
for (i = 0; i < (uint16_t)frameBits; i ++)
output [i] = getbit (data [i >> 3], i & 07);
}
/* C-language butterfly */
void viterbi_768::BFLY (int i, int s, COMPUTETYPE * syms,
struct v * vp, decision_t * d) {
int32_t j, decision0, decision1;
COMPUTETYPE metric,m0,m1,m2,m3;
metric =0;
for (j = 0; j < RATE;j++)
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s*RATE+j]) >>
METRICSHIFT ;
metric = metric >> PRECISIONSHIFT;
const COMPUTETYPE max =
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
m0 = vp->old_metrics->t [i] + metric;
m1 = vp->old_metrics->t [i + NUMSTATES / 2] + (max - metric);
m2 = vp->old_metrics->t [i] + (max - metric);
m3 = vp->old_metrics->t [i + NUMSTATES / 2] + metric;
decision0 = ((int32_t)(m0 - m1)) > 0;
decision1 = ((int32_t)(m2 - m3)) > 0;
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
}
/* Update decoder with a block of demodulated symbols
* Note that nbits is the number of decoded data bits, not the number
* of symbols!
*/
void viterbi_768::update_viterbi_blk_GENERIC (struct v *vp,
COMPUTETYPE *syms,
int16_t nbits){
decision_t *d = (decision_t *)vp -> decisions;
int32_t s, i;
for (s = 0; s < nbits; s++)
memset (&d [s], 0, sizeof (decision_t));
for (s = 0; s < nbits; s++){
void *tmp;
for (i = 0; i < NUMSTATES / 2; i++)
BFLY (i, s, syms, vp, vp -> decisions);
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
// Swap pointers to old and new metrics
tmp = vp -> old_metrics;
vp -> old_metrics = vp -> new_metrics;
vp -> new_metrics = (metric_t *)tmp;
}
}
extern "C" {
#ifndef SSE_AVAILABLE
void FULL_SPIRAL_no_sse (int,
#else
void FULL_SPIRAL_sse (int,
#endif
COMPUTETYPE *Y,
COMPUTETYPE *X,
COMPUTETYPE *syms,
DECISIONTYPE *dec,
COMPUTETYPE *Branchtab);
}
void viterbi_768::update_viterbi_blk_SPIRAL (struct v *vp,
COMPUTETYPE *syms,
int16_t nbits){
decision_t *d = (decision_t *)vp -> decisions;
int32_t s;
for (s = 0; s < nbits; s++)
memset (d + s, 0, sizeof(decision_t));
#ifndef SSE_AVAILABLE
FULL_SPIRAL_no_sse (nbits,
#else
FULL_SPIRAL_sse (nbits,
#endif
vp -> new_metrics -> t,
vp -> old_metrics -> t,
syms,
d -> t, Branchtab);
}
//
/* Viterbi chainback */
void viterbi_768::chainback_viterbi (struct v *vp,
uint8_t *data, /* Decoded output data */
int16_t nbits, /* Number of data bits */
uint16_t endstate){ /*Terminal encoder state */
decision_t *d = vp -> decisions;
/* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate = (endstate % NUMSTATES) << ADDSHIFT;
/* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += (K - 1); /* Look past tail */
while (nbits-- != 0){
int k;
// int l = (endstate >> ADDSHIFT) / 32;
// int m = (endstate >> ADDSHIFT) % 32;
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
((endstate>>ADDSHIFT) % 32)) & 1;
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
data [nbits >> 3] = endstate >> SUBSHIFT;
}
}
/* Initialize Viterbi decoder for start of new frame */
void viterbi_768::init_viterbi (struct v *p, int16_t starting_state){
struct v *vp = p;
int32_t i;
for (i = 0; i < NUMSTATES; i++)
vp -> metrics1.t[i] = 63;
vp -> old_metrics = &vp -> metrics1;
vp -> new_metrics = &vp -> metrics2;
/* Bias known start state */
vp -> old_metrics-> t[starting_state & (NUMSTATES-1)] = 0;
}

View File

@@ -1,11 +0,0 @@
The viterbi implementation is copied from the spiral one, all
rights gratefully acknowledged.
Since we serve more than a single platform, we do not use the SSE
implementation.
The particular spiral implementation (see the file spiral_no_sse.c"
is generated for the wordsize and the other parameters for FIC blocks.
The implementation therefore has a "switch", that - when set to true -
selects the spiral implementation, and - when set to false (the default) -
it uses the generic implementation.

File diff suppressed because it is too large Load Diff

View File

@@ -1,701 +0,0 @@
#ifdef NEON_AVAILABLE
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
//#include <include/mm_malloc.h>
//#include <pmmintrin.h>
//#include <emmintrin.h>
//#include <xmmintrin.h>
//#include <mmintrin.h>
#include "SSE2NEON.h"
#include "spiral-neon.h"
void init_FULL_SPIRAL() {
}
void FULL_SPIRAL_neon(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
int i9;
// for(i9 = 0; i9 <= amount; i9++) {
for(i9 = 0; i9 < amount; i9++) {
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
, a840;
int a820, a850;
unsigned char s118, s125, s132, s139, s146, s153, s160
, s167, s174, s181, s188, s195, s202, s209, s216
, s223;
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
, *a839, *b104;
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
, *a977, *a998;
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
, *a999;
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
, a829, a831, a832, a835, a837, a838, a841, a843
, a844, a845, a846, a847, a848, a857, a858, a860
, a861, a863, a864, a866, a867, a868, a869, a870
, a871, a878, a879, a881, a882, a884, a885, a887
, a888, a889, a890, a891, a892, a899, a900, a902
, a903, a905, a906, a908, a909, a910, a911, a912
, a913, a920, a921, a923, a924, a926, a927, a929
, a930, a931, a932, a933, a934, a941, a942, a944
, a945, a947, a948, a950, a951, a952, a953, a954
, a955, a962, a963, a965, a966, a968, a969, a971
, a972, a973, a974, a975, a976, a983, a984, a986
, a987, a989, a990, a992, a993, a994, a995, a996
, a997, b105, b106, b107, b108, b109, b110, b111
, b112, b113, b114, b115, b116, b117, b118, b119
, b120, b121, b122, b123, b124, b125, b126, b127
, b128, b129, b130, b131, b132, b133, b134, b135
, b136, d37, d38, d39, d40, d41, d42, d43
, d44, d45, d46, d47, d48, d49, d50, d51
, d52, d53, d54, d55, d56, d57, d58, d59
, d60, d61, d62, d63, d64, d65, d66, d67
, d68, m100, m101, m102, m103, m104, m105, m106
, m107, m108, m109, m110, m111, m112, m113, m114
, m115, m116, m117, m118, m119, m120, m121, m122
, m123, m124, m125, m126, m127, m128, m129, m130
, m131, m132, m133, m134, m135, m136, m73, m74
, m75, m76, m77, m78, m79, m80, m81, m82
, m83, m84, m85, m86, m87, m88, m89, m90
, m91, m92, m93, m94, m95, m96, m97, m98
, m99, s114, s115, s116, s117, s119, s120, s121
, s122, s123, s124, s126, s127, s128, s129, s130
, s131, s133, s134, s135, s136, s137, s138, s140
, s141, s142, s143, s144, s145, s147, s148, s149
, s150, s151, s152, s154, s155, s156, s157, s158
, s159, s161, s162, s163, s164, s165, s166, s168
, s169, s170, s171, s172, s173, s175, s176, s177
, s178, s179, s180, s182, s183, s184, s185, s186
, s187, s189, s190, s191, s192, s193, s194, s196
, s197, s198, s199, s200, s201, s203, s204, s205
, s206, s207, s208, s210, s211, s212, s213, s214
, s215, s217, s218, s219, s220, s221, s222, s224
, s225, t39, t40, t41, t42, t43, t44, t45
, t46, t47, t48, t49, t50, t51, t52, t53
, t54, t55, t56, t57, t58, t59, t60, t61
, t62, t63, t64, t65, t66, t67, t68, t69
, t70;
a818 = ((__m128i *) X);
s114 = *(a818);
a819 = (a818 + 8);
s115 = *(a819);
a820 = (8 * i9);
a821 = (syms + a820);
a822 = *(a821);
a823 = _mm_set1_epi32(a822);
a824 = ((__m128i *) Branchtab);
a825 = *(a824);
a826 = _mm_xor_si128(a823, a825);
b104 = (a820 + syms);
a827 = (b104 + 1);
a828 = *(a827);
a829 = _mm_set1_epi32(a828);
a830 = (a824 + 8);
a831 = *(a830);
a832 = _mm_xor_si128(a829, a831);
a833 = (b104 + 2);
a834 = *(a833);
a835 = _mm_set1_epi32(a834);
a836 = (a824 + 16);
a837 = *(a836);
a838 = _mm_xor_si128(a835, a837);
a839 = (b104 + 3);
a840 = *(a839);
a841 = _mm_set1_epi32(a840);
a842 = (a824 + 24);
a843 = *(a842);
a844 = _mm_xor_si128(a841, a843);
b105 = _mm_add_epi32(a826, a832);
b106 = _mm_add_epi32(b105, a838);
t39 = _mm_add_epi32(b106, a844);
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
m73 = _mm_add_epi32(s114, t39);
m74 = _mm_add_epi32(s115, t40);
m75 = _mm_add_epi32(s114, t40);
m76 = _mm_add_epi32(s115, t39);
d37 = _mm_cmpgt_epi32(m73, m74);
d38 = _mm_cmpgt_epi32(m75, m76);
a845 = _mm_andnot_si128(d37, m73);
a846 = _mm_and_si128(d37, m74);
s116 = _mm_or_si128(a845, a846);
a847 = _mm_andnot_si128(d38, m75);
a848 = _mm_and_si128(d38, m76);
s117 = _mm_or_si128(a847, a848);
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
a849 = ((unsigned char *) dec);
a850 = (16 * i9);
a851 = (a849 + a850);
*(a851) = s118;
s119 = _mm_unpacklo_epi32(s116, s117);
s120 = _mm_unpackhi_epi32(s116, s117);
a852 = ((__m128i *) Y);
*(a852) = s119;
a853 = (a852 + 1);
*(a853) = s120;
a854 = (a818 + 1);
s121 = *(a854);
a855 = (a818 + 9);
s122 = *(a855);
a856 = (a824 + 1);
a857 = *(a856);
a858 = _mm_xor_si128(a823, a857);
a859 = (a824 + 9);
a860 = *(a859);
a861 = _mm_xor_si128(a829, a860);
a862 = (a824 + 17);
a863 = *(a862);
a864 = _mm_xor_si128(a835, a863);
a865 = (a824 + 25);
a866 = *(a865);
a867 = _mm_xor_si128(a841, a866);
b107 = _mm_add_epi32(a858, a861);
b108 = _mm_add_epi32(b107, a864);
t41 = _mm_add_epi32(b108, a867);
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
m77 = _mm_add_epi32(s121, t41);
m78 = _mm_add_epi32(s122, t42);
m79 = _mm_add_epi32(s121, t42);
m80 = _mm_add_epi32(s122, t41);
d39 = _mm_cmpgt_epi32(m77, m78);
d40 = _mm_cmpgt_epi32(m79, m80);
a868 = _mm_andnot_si128(d39, m77);
a869 = _mm_and_si128(d39, m78);
s123 = _mm_or_si128(a868, a869);
a870 = _mm_andnot_si128(d40, m79);
a871 = _mm_and_si128(d40, m80);
s124 = _mm_or_si128(a870, a871);
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
a872 = (a851 + 1);
*(a872) = s125;
s126 = _mm_unpacklo_epi32(s123, s124);
s127 = _mm_unpackhi_epi32(s123, s124);
a873 = (a852 + 2);
*(a873) = s126;
a874 = (a852 + 3);
*(a874) = s127;
a875 = (a818 + 2);
s128 = *(a875);
a876 = (a818 + 10);
s129 = *(a876);
a877 = (a824 + 2);
a878 = *(a877);
a879 = _mm_xor_si128(a823, a878);
a880 = (a824 + 10);
a881 = *(a880);
a882 = _mm_xor_si128(a829, a881);
a883 = (a824 + 18);
a884 = *(a883);
a885 = _mm_xor_si128(a835, a884);
a886 = (a824 + 26);
a887 = *(a886);
a888 = _mm_xor_si128(a841, a887);
b109 = _mm_add_epi32(a879, a882);
b110 = _mm_add_epi32(b109, a885);
t43 = _mm_add_epi32(b110, a888);
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
m81 = _mm_add_epi32(s128, t43);
m82 = _mm_add_epi32(s129, t44);
m83 = _mm_add_epi32(s128, t44);
m84 = _mm_add_epi32(s129, t43);
d41 = _mm_cmpgt_epi32(m81, m82);
d42 = _mm_cmpgt_epi32(m83, m84);
a889 = _mm_andnot_si128(d41, m81);
a890 = _mm_and_si128(d41, m82);
s130 = _mm_or_si128(a889, a890);
a891 = _mm_andnot_si128(d42, m83);
a892 = _mm_and_si128(d42, m84);
s131 = _mm_or_si128(a891, a892);
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
a893 = (a851 + 2);
*(a893) = s132;
s133 = _mm_unpacklo_epi32(s130, s131);
s134 = _mm_unpackhi_epi32(s130, s131);
a894 = (a852 + 4);
*(a894) = s133;
a895 = (a852 + 5);
*(a895) = s134;
a896 = (a818 + 3);
s135 = *(a896);
a897 = (a818 + 11);
s136 = *(a897);
a898 = (a824 + 3);
a899 = *(a898);
a900 = _mm_xor_si128(a823, a899);
a901 = (a824 + 11);
a902 = *(a901);
a903 = _mm_xor_si128(a829, a902);
a904 = (a824 + 19);
a905 = *(a904);
a906 = _mm_xor_si128(a835, a905);
a907 = (a824 + 27);
a908 = *(a907);
a909 = _mm_xor_si128(a841, a908);
b111 = _mm_add_epi32(a900, a903);
b112 = _mm_add_epi32(b111, a906);
t45 = _mm_add_epi32(b112, a909);
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
m85 = _mm_add_epi32(s135, t45);
m86 = _mm_add_epi32(s136, t46);
m87 = _mm_add_epi32(s135, t46);
m88 = _mm_add_epi32(s136, t45);
d43 = _mm_cmpgt_epi32(m85, m86);
d44 = _mm_cmpgt_epi32(m87, m88);
a910 = _mm_andnot_si128(d43, m85);
a911 = _mm_and_si128(d43, m86);
s137 = _mm_or_si128(a910, a911);
a912 = _mm_andnot_si128(d44, m87);
a913 = _mm_and_si128(d44, m88);
s138 = _mm_or_si128(a912, a913);
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
a914 = (a851 + 3);
*(a914) = s139;
s140 = _mm_unpacklo_epi32(s137, s138);
s141 = _mm_unpackhi_epi32(s137, s138);
a915 = (a852 + 6);
*(a915) = s140;
a916 = (a852 + 7);
*(a916) = s141;
a917 = (a818 + 4);
s142 = *(a917);
a918 = (a818 + 12);
s143 = *(a918);
a919 = (a824 + 4);
a920 = *(a919);
a921 = _mm_xor_si128(a823, a920);
a922 = (a824 + 12);
a923 = *(a922);
a924 = _mm_xor_si128(a829, a923);
a925 = (a824 + 20);
a926 = *(a925);
a927 = _mm_xor_si128(a835, a926);
a928 = (a824 + 28);
a929 = *(a928);
a930 = _mm_xor_si128(a841, a929);
b113 = _mm_add_epi32(a921, a924);
b114 = _mm_add_epi32(b113, a927);
t47 = _mm_add_epi32(b114, a930);
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
m89 = _mm_add_epi32(s142, t47);
m90 = _mm_add_epi32(s143, t48);
m91 = _mm_add_epi32(s142, t48);
m92 = _mm_add_epi32(s143, t47);
d45 = _mm_cmpgt_epi32(m89, m90);
d46 = _mm_cmpgt_epi32(m91, m92);
a931 = _mm_andnot_si128(d45, m89);
a932 = _mm_and_si128(d45, m90);
s144 = _mm_or_si128(a931, a932);
a933 = _mm_andnot_si128(d46, m91);
a934 = _mm_and_si128(d46, m92);
s145 = _mm_or_si128(a933, a934);
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
a935 = (a851 + 4);
*(a935) = s146;
s147 = _mm_unpacklo_epi32(s144, s145);
s148 = _mm_unpackhi_epi32(s144, s145);
a936 = (a852 + 8);
*(a936) = s147;
a937 = (a852 + 9);
*(a937) = s148;
a938 = (a818 + 5);
s149 = *(a938);
a939 = (a818 + 13);
s150 = *(a939);
a940 = (a824 + 5);
a941 = *(a940);
a942 = _mm_xor_si128(a823, a941);
a943 = (a824 + 13);
a944 = *(a943);
a945 = _mm_xor_si128(a829, a944);
a946 = (a824 + 21);
a947 = *(a946);
a948 = _mm_xor_si128(a835, a947);
a949 = (a824 + 29);
a950 = *(a949);
a951 = _mm_xor_si128(a841, a950);
b115 = _mm_add_epi32(a942, a945);
b116 = _mm_add_epi32(b115, a948);
t49 = _mm_add_epi32(b116, a951);
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
m93 = _mm_add_epi32(s149, t49);
m94 = _mm_add_epi32(s150, t50);
m95 = _mm_add_epi32(s149, t50);
m96 = _mm_add_epi32(s150, t49);
d47 = _mm_cmpgt_epi32(m93, m94);
d48 = _mm_cmpgt_epi32(m95, m96);
a952 = _mm_andnot_si128(d47, m93);
a953 = _mm_and_si128(d47, m94);
s151 = _mm_or_si128(a952, a953);
a954 = _mm_andnot_si128(d48, m95);
a955 = _mm_and_si128(d48, m96);
s152 = _mm_or_si128(a954, a955);
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
a956 = (a851 + 5);
*(a956) = s153;
s154 = _mm_unpacklo_epi32(s151, s152);
s155 = _mm_unpackhi_epi32(s151, s152);
a957 = (a852 + 10);
*(a957) = s154;
a958 = (a852 + 11);
*(a958) = s155;
a959 = (a818 + 6);
s156 = *(a959);
a960 = (a818 + 14);
s157 = *(a960);
a961 = (a824 + 6);
a962 = *(a961);
a963 = _mm_xor_si128(a823, a962);
a964 = (a824 + 14);
a965 = *(a964);
a966 = _mm_xor_si128(a829, a965);
a967 = (a824 + 22);
a968 = *(a967);
a969 = _mm_xor_si128(a835, a968);
a970 = (a824 + 30);
a971 = *(a970);
a972 = _mm_xor_si128(a841, a971);
b117 = _mm_add_epi32(a963, a966);
b118 = _mm_add_epi32(b117, a969);
t51 = _mm_add_epi32(b118, a972);
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
m97 = _mm_add_epi32(s156, t51);
m98 = _mm_add_epi32(s157, t52);
m99 = _mm_add_epi32(s156, t52);
m100 = _mm_add_epi32(s157, t51);
d49 = _mm_cmpgt_epi32(m97, m98);
d50 = _mm_cmpgt_epi32(m99, m100);
a973 = _mm_andnot_si128(d49, m97);
a974 = _mm_and_si128(d49, m98);
s158 = _mm_or_si128(a973, a974);
a975 = _mm_andnot_si128(d50, m99);
a976 = _mm_and_si128(d50, m100);
s159 = _mm_or_si128(a975, a976);
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
a977 = (a851 + 6);
*(a977) = s160;
s161 = _mm_unpacklo_epi32(s158, s159);
s162 = _mm_unpackhi_epi32(s158, s159);
a978 = (a852 + 12);
*(a978) = s161;
a979 = (a852 + 13);
*(a979) = s162;
a980 = (a818 + 7);
s163 = *(a980);
a981 = (a818 + 15);
s164 = *(a981);
a982 = (a824 + 7);
a983 = *(a982);
a984 = _mm_xor_si128(a823, a983);
a985 = (a824 + 15);
a986 = *(a985);
a987 = _mm_xor_si128(a829, a986);
a988 = (a824 + 23);
a989 = *(a988);
a990 = _mm_xor_si128(a835, a989);
a991 = (a824 + 31);
a992 = *(a991);
a993 = _mm_xor_si128(a841, a992);
b119 = _mm_add_epi32(a984, a987);
b120 = _mm_add_epi32(b119, a990);
t53 = _mm_add_epi32(b120, a993);
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
m101 = _mm_add_epi32(s163, t53);
m102 = _mm_add_epi32(s164, t54);
m103 = _mm_add_epi32(s163, t54);
m104 = _mm_add_epi32(s164, t53);
d51 = _mm_cmpgt_epi32(m101, m102);
d52 = _mm_cmpgt_epi32(m103, m104);
a994 = _mm_andnot_si128(d51, m101);
a995 = _mm_and_si128(d51, m102);
s165 = _mm_or_si128(a994, a995);
a996 = _mm_andnot_si128(d52, m103);
a997 = _mm_and_si128(d52, m104);
s166 = _mm_or_si128(a996, a997);
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
a998 = (a851 + 7);
*(a998) = s167;
s168 = _mm_unpacklo_epi32(s165, s166);
s169 = _mm_unpackhi_epi32(s165, s166);
a999 = (a852 + 14);
*(a999) = s168;
a1000 = (a852 + 15);
*(a1000) = s169;
s170 = *(a852);
s171 = *(a936);
a1001 = (b104 + 4);
a1002 = *(a1001);
a1003 = _mm_set1_epi32(a1002);
a1004 = _mm_xor_si128(a1003, a825);
a1005 = (b104 + 5);
a1006 = *(a1005);
a1007 = _mm_set1_epi32(a1006);
a1008 = _mm_xor_si128(a1007, a831);
a1009 = (b104 + 6);
a1010 = *(a1009);
a1011 = _mm_set1_epi32(a1010);
a1012 = _mm_xor_si128(a1011, a837);
a1013 = (b104 + 7);
a1014 = *(a1013);
a1015 = _mm_set1_epi32(a1014);
a1016 = _mm_xor_si128(a1015, a843);
b121 = _mm_add_epi32(a1004, a1008);
b122 = _mm_add_epi32(b121, a1012);
t55 = _mm_add_epi32(b122, a1016);
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
m105 = _mm_add_epi32(s170, t55);
m106 = _mm_add_epi32(s171, t56);
m107 = _mm_add_epi32(s170, t56);
m108 = _mm_add_epi32(s171, t55);
d53 = _mm_cmpgt_epi32(m105, m106);
d54 = _mm_cmpgt_epi32(m107, m108);
a1017 = _mm_andnot_si128(d53, m105);
a1018 = _mm_and_si128(d53, m106);
s172 = _mm_or_si128(a1017, a1018);
a1019 = _mm_andnot_si128(d54, m107);
a1020 = _mm_and_si128(d54, m108);
s173 = _mm_or_si128(a1019, a1020);
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
a1021 = (a851 + 8);
*(a1021) = s174;
s175 = _mm_unpacklo_epi32(s172, s173);
s176 = _mm_unpackhi_epi32(s172, s173);
*(a818) = s175;
*(a854) = s176;
s177 = *(a853);
s178 = *(a937);
a1022 = _mm_xor_si128(a1003, a857);
a1023 = _mm_xor_si128(a1007, a860);
a1024 = _mm_xor_si128(a1011, a863);
a1025 = _mm_xor_si128(a1015, a866);
b123 = _mm_add_epi32(a1022, a1023);
b124 = _mm_add_epi32(b123, a1024);
t57 = _mm_add_epi32(b124, a1025);
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
m109 = _mm_add_epi32(s177, t57);
m110 = _mm_add_epi32(s178, t58);
m111 = _mm_add_epi32(s177, t58);
m112 = _mm_add_epi32(s178, t57);
d55 = _mm_cmpgt_epi32(m109, m110);
d56 = _mm_cmpgt_epi32(m111, m112);
a1026 = _mm_andnot_si128(d55, m109);
a1027 = _mm_and_si128(d55, m110);
s179 = _mm_or_si128(a1026, a1027);
a1028 = _mm_andnot_si128(d56, m111);
a1029 = _mm_and_si128(d56, m112);
s180 = _mm_or_si128(a1028, a1029);
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
a1030 = (a851 + 9);
*(a1030) = s181;
s182 = _mm_unpacklo_epi32(s179, s180);
s183 = _mm_unpackhi_epi32(s179, s180);
*(a875) = s182;
*(a896) = s183;
s184 = *(a873);
s185 = *(a957);
a1031 = _mm_xor_si128(a1003, a878);
a1032 = _mm_xor_si128(a1007, a881);
a1033 = _mm_xor_si128(a1011, a884);
a1034 = _mm_xor_si128(a1015, a887);
b125 = _mm_add_epi32(a1031, a1032);
b126 = _mm_add_epi32(b125, a1033);
t59 = _mm_add_epi32(b126, a1034);
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
m113 = _mm_add_epi32(s184, t59);
m114 = _mm_add_epi32(s185, t60);
m115 = _mm_add_epi32(s184, t60);
m116 = _mm_add_epi32(s185, t59);
d57 = _mm_cmpgt_epi32(m113, m114);
d58 = _mm_cmpgt_epi32(m115, m116);
a1035 = _mm_andnot_si128(d57, m113);
a1036 = _mm_and_si128(d57, m114);
s186 = _mm_or_si128(a1035, a1036);
a1037 = _mm_andnot_si128(d58, m115);
a1038 = _mm_and_si128(d58, m116);
s187 = _mm_or_si128(a1037, a1038);
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
a1039 = (a851 + 10);
*(a1039) = s188;
s189 = _mm_unpacklo_epi32(s186, s187);
s190 = _mm_unpackhi_epi32(s186, s187);
*(a917) = s189;
*(a938) = s190;
s191 = *(a874);
s192 = *(a958);
a1040 = _mm_xor_si128(a1003, a899);
a1041 = _mm_xor_si128(a1007, a902);
a1042 = _mm_xor_si128(a1011, a905);
a1043 = _mm_xor_si128(a1015, a908);
b127 = _mm_add_epi32(a1040, a1041);
b128 = _mm_add_epi32(b127, a1042);
t61 = _mm_add_epi32(b128, a1043);
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
m117 = _mm_add_epi32(s191, t61);
m118 = _mm_add_epi32(s192, t62);
m119 = _mm_add_epi32(s191, t62);
m120 = _mm_add_epi32(s192, t61);
d59 = _mm_cmpgt_epi32(m117, m118);
d60 = _mm_cmpgt_epi32(m119, m120);
a1044 = _mm_andnot_si128(d59, m117);
a1045 = _mm_and_si128(d59, m118);
s193 = _mm_or_si128(a1044, a1045);
a1046 = _mm_andnot_si128(d60, m119);
a1047 = _mm_and_si128(d60, m120);
s194 = _mm_or_si128(a1046, a1047);
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
a1048 = (a851 + 11);
*(a1048) = s195;
s196 = _mm_unpacklo_epi32(s193, s194);
s197 = _mm_unpackhi_epi32(s193, s194);
*(a959) = s196;
*(a980) = s197;
s198 = *(a894);
s199 = *(a978);
a1049 = _mm_xor_si128(a1003, a920);
a1050 = _mm_xor_si128(a1007, a923);
a1051 = _mm_xor_si128(a1011, a926);
a1052 = _mm_xor_si128(a1015, a929);
b129 = _mm_add_epi32(a1049, a1050);
b130 = _mm_add_epi32(b129, a1051);
t63 = _mm_add_epi32(b130, a1052);
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
m121 = _mm_add_epi32(s198, t63);
m122 = _mm_add_epi32(s199, t64);
m123 = _mm_add_epi32(s198, t64);
m124 = _mm_add_epi32(s199, t63);
d61 = _mm_cmpgt_epi32(m121, m122);
d62 = _mm_cmpgt_epi32(m123, m124);
a1053 = _mm_andnot_si128(d61, m121);
a1054 = _mm_and_si128(d61, m122);
s200 = _mm_or_si128(a1053, a1054);
a1055 = _mm_andnot_si128(d62, m123);
a1056 = _mm_and_si128(d62, m124);
s201 = _mm_or_si128(a1055, a1056);
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
a1057 = (a851 + 12);
*(a1057) = s202;
s203 = _mm_unpacklo_epi32(s200, s201);
s204 = _mm_unpackhi_epi32(s200, s201);
*(a819) = s203;
*(a855) = s204;
s205 = *(a895);
s206 = *(a979);
a1058 = _mm_xor_si128(a1003, a941);
a1059 = _mm_xor_si128(a1007, a944);
a1060 = _mm_xor_si128(a1011, a947);
a1061 = _mm_xor_si128(a1015, a950);
b131 = _mm_add_epi32(a1058, a1059);
b132 = _mm_add_epi32(b131, a1060);
t65 = _mm_add_epi32(b132, a1061);
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
m125 = _mm_add_epi32(s205, t65);
m126 = _mm_add_epi32(s206, t66);
m127 = _mm_add_epi32(s205, t66);
m128 = _mm_add_epi32(s206, t65);
d63 = _mm_cmpgt_epi32(m125, m126);
d64 = _mm_cmpgt_epi32(m127, m128);
a1062 = _mm_andnot_si128(d63, m125);
a1063 = _mm_and_si128(d63, m126);
s207 = _mm_or_si128(a1062, a1063);
a1064 = _mm_andnot_si128(d64, m127);
a1065 = _mm_and_si128(d64, m128);
s208 = _mm_or_si128(a1064, a1065);
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
a1066 = (a851 + 13);
*(a1066) = s209;
s210 = _mm_unpacklo_epi32(s207, s208);
s211 = _mm_unpackhi_epi32(s207, s208);
*(a876) = s210;
*(a897) = s211;
s212 = *(a915);
s213 = *(a999);
a1067 = _mm_xor_si128(a1003, a962);
a1068 = _mm_xor_si128(a1007, a965);
a1069 = _mm_xor_si128(a1011, a968);
a1070 = _mm_xor_si128(a1015, a971);
b133 = _mm_add_epi32(a1067, a1068);
b134 = _mm_add_epi32(b133, a1069);
t67 = _mm_add_epi32(b134, a1070);
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
m129 = _mm_add_epi32(s212, t67);
m130 = _mm_add_epi32(s213, t68);
m131 = _mm_add_epi32(s212, t68);
m132 = _mm_add_epi32(s213, t67);
d65 = _mm_cmpgt_epi32(m129, m130);
d66 = _mm_cmpgt_epi32(m131, m132);
a1071 = _mm_andnot_si128(d65, m129);
a1072 = _mm_and_si128(d65, m130);
s214 = _mm_or_si128(a1071, a1072);
a1073 = _mm_andnot_si128(d66, m131);
a1074 = _mm_and_si128(d66, m132);
s215 = _mm_or_si128(a1073, a1074);
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
a1075 = (a851 + 14);
*(a1075) = s216;
s217 = _mm_unpacklo_epi32(s214, s215);
s218 = _mm_unpackhi_epi32(s214, s215);
*(a918) = s217;
*(a939) = s218;
s219 = *(a916);
s220 = *(a1000);
a1076 = _mm_xor_si128(a1003, a983);
a1077 = _mm_xor_si128(a1007, a986);
a1078 = _mm_xor_si128(a1011, a989);
a1079 = _mm_xor_si128(a1015, a992);
b135 = _mm_add_epi32(a1076, a1077);
b136 = _mm_add_epi32(b135, a1078);
t69 = _mm_add_epi32(b136, a1079);
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
m133 = _mm_add_epi32(s219, t69);
m134 = _mm_add_epi32(s220, t70);
m135 = _mm_add_epi32(s219, t70);
m136 = _mm_add_epi32(s220, t69);
d67 = _mm_cmpgt_epi32(m133, m134);
d68 = _mm_cmpgt_epi32(m135, m136);
a1080 = _mm_andnot_si128(d67, m133);
a1081 = _mm_and_si128(d67, m134);
s221 = _mm_or_si128(a1080, a1081);
a1082 = _mm_andnot_si128(d68, m135);
a1083 = _mm_and_si128(d68, m136);
s222 = _mm_or_si128(a1082, a1083);
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
a1084 = (a851 + 15);
*(a1084) = s223;
s224 = _mm_unpacklo_epi32(s221, s222);
s225 = _mm_unpackhi_epi32(s221, s222);
*(a960) = s224;
*(a981) = s225;
}
/* skip */
}
#endif

View File

@@ -1,36 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
#include <stdint.h>
#define K 7
#define RATE 4
#define POLYS { 109, 79, 83, 109 }
#define NUMSTATES 64
#define FRAMEBITS 2048
#define DECISIONTYPE unsigned int
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE uint32_t
#define EBN0 3
#define TRIALS 10000
#define __int32 int
#define FUNC FULL_SPIRAL
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 2000000000

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
#define K 7
#define RATE 4
#define POLYS { 109, 79, 83, 109 }
#define NUMSTATES 64
#define FRAMEBITS 2048
#define DECISIONTYPE unsigned int
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE unsigned int
#define EBN0 3
#define TRIALS 10000
#define __int32 int
#define FUNC FULL_SPIRAL
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 2000000000

View File

@@ -1,698 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
//#include <include/mm_malloc.h>
//#include <pmmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#include <mmintrin.h>
#include "spiral-sse.h"
void init_FULL_SPIRAL() {
}
void FULL_SPIRAL_sse(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
int i9;
// for(i9 = 0; i9 <= amount; i9++) {
for(i9 = 0; i9 < amount; i9++) {
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
, a840;
int a820, a850;
unsigned char s118, s125, s132, s139, s146, s153, s160
, s167, s174, s181, s188, s195, s202, s209, s216
, s223;
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
, *a839, *b104;
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
, *a977, *a998;
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
, *a999;
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
, a829, a831, a832, a835, a837, a838, a841, a843
, a844, a845, a846, a847, a848, a857, a858, a860
, a861, a863, a864, a866, a867, a868, a869, a870
, a871, a878, a879, a881, a882, a884, a885, a887
, a888, a889, a890, a891, a892, a899, a900, a902
, a903, a905, a906, a908, a909, a910, a911, a912
, a913, a920, a921, a923, a924, a926, a927, a929
, a930, a931, a932, a933, a934, a941, a942, a944
, a945, a947, a948, a950, a951, a952, a953, a954
, a955, a962, a963, a965, a966, a968, a969, a971
, a972, a973, a974, a975, a976, a983, a984, a986
, a987, a989, a990, a992, a993, a994, a995, a996
, a997, b105, b106, b107, b108, b109, b110, b111
, b112, b113, b114, b115, b116, b117, b118, b119
, b120, b121, b122, b123, b124, b125, b126, b127
, b128, b129, b130, b131, b132, b133, b134, b135
, b136, d37, d38, d39, d40, d41, d42, d43
, d44, d45, d46, d47, d48, d49, d50, d51
, d52, d53, d54, d55, d56, d57, d58, d59
, d60, d61, d62, d63, d64, d65, d66, d67
, d68, m100, m101, m102, m103, m104, m105, m106
, m107, m108, m109, m110, m111, m112, m113, m114
, m115, m116, m117, m118, m119, m120, m121, m122
, m123, m124, m125, m126, m127, m128, m129, m130
, m131, m132, m133, m134, m135, m136, m73, m74
, m75, m76, m77, m78, m79, m80, m81, m82
, m83, m84, m85, m86, m87, m88, m89, m90
, m91, m92, m93, m94, m95, m96, m97, m98
, m99, s114, s115, s116, s117, s119, s120, s121
, s122, s123, s124, s126, s127, s128, s129, s130
, s131, s133, s134, s135, s136, s137, s138, s140
, s141, s142, s143, s144, s145, s147, s148, s149
, s150, s151, s152, s154, s155, s156, s157, s158
, s159, s161, s162, s163, s164, s165, s166, s168
, s169, s170, s171, s172, s173, s175, s176, s177
, s178, s179, s180, s182, s183, s184, s185, s186
, s187, s189, s190, s191, s192, s193, s194, s196
, s197, s198, s199, s200, s201, s203, s204, s205
, s206, s207, s208, s210, s211, s212, s213, s214
, s215, s217, s218, s219, s220, s221, s222, s224
, s225, t39, t40, t41, t42, t43, t44, t45
, t46, t47, t48, t49, t50, t51, t52, t53
, t54, t55, t56, t57, t58, t59, t60, t61
, t62, t63, t64, t65, t66, t67, t68, t69
, t70;
a818 = ((__m128i *) X);
s114 = *(a818);
a819 = (a818 + 8);
s115 = *(a819);
a820 = (8 * i9);
a821 = (syms + a820);
a822 = *(a821);
a823 = _mm_set1_epi32(a822);
a824 = ((__m128i *) Branchtab);
a825 = *(a824);
a826 = _mm_xor_si128(a823, a825);
b104 = (a820 + syms);
a827 = (b104 + 1);
a828 = *(a827);
a829 = _mm_set1_epi32(a828);
a830 = (a824 + 8);
a831 = *(a830);
a832 = _mm_xor_si128(a829, a831);
a833 = (b104 + 2);
a834 = *(a833);
a835 = _mm_set1_epi32(a834);
a836 = (a824 + 16);
a837 = *(a836);
a838 = _mm_xor_si128(a835, a837);
a839 = (b104 + 3);
a840 = *(a839);
a841 = _mm_set1_epi32(a840);
a842 = (a824 + 24);
a843 = *(a842);
a844 = _mm_xor_si128(a841, a843);
b105 = _mm_add_epi32(a826, a832);
b106 = _mm_add_epi32(b105, a838);
t39 = _mm_add_epi32(b106, a844);
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
m73 = _mm_add_epi32(s114, t39);
m74 = _mm_add_epi32(s115, t40);
m75 = _mm_add_epi32(s114, t40);
m76 = _mm_add_epi32(s115, t39);
d37 = _mm_cmpgt_epi32(m73, m74);
d38 = _mm_cmpgt_epi32(m75, m76);
a845 = _mm_andnot_si128(d37, m73);
a846 = _mm_and_si128(d37, m74);
s116 = _mm_or_si128(a845, a846);
a847 = _mm_andnot_si128(d38, m75);
a848 = _mm_and_si128(d38, m76);
s117 = _mm_or_si128(a847, a848);
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
a849 = ((unsigned char *) dec);
a850 = (16 * i9);
a851 = (a849 + a850);
*(a851) = s118;
s119 = _mm_unpacklo_epi32(s116, s117);
s120 = _mm_unpackhi_epi32(s116, s117);
a852 = ((__m128i *) Y);
*(a852) = s119;
a853 = (a852 + 1);
*(a853) = s120;
a854 = (a818 + 1);
s121 = *(a854);
a855 = (a818 + 9);
s122 = *(a855);
a856 = (a824 + 1);
a857 = *(a856);
a858 = _mm_xor_si128(a823, a857);
a859 = (a824 + 9);
a860 = *(a859);
a861 = _mm_xor_si128(a829, a860);
a862 = (a824 + 17);
a863 = *(a862);
a864 = _mm_xor_si128(a835, a863);
a865 = (a824 + 25);
a866 = *(a865);
a867 = _mm_xor_si128(a841, a866);
b107 = _mm_add_epi32(a858, a861);
b108 = _mm_add_epi32(b107, a864);
t41 = _mm_add_epi32(b108, a867);
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
m77 = _mm_add_epi32(s121, t41);
m78 = _mm_add_epi32(s122, t42);
m79 = _mm_add_epi32(s121, t42);
m80 = _mm_add_epi32(s122, t41);
d39 = _mm_cmpgt_epi32(m77, m78);
d40 = _mm_cmpgt_epi32(m79, m80);
a868 = _mm_andnot_si128(d39, m77);
a869 = _mm_and_si128(d39, m78);
s123 = _mm_or_si128(a868, a869);
a870 = _mm_andnot_si128(d40, m79);
a871 = _mm_and_si128(d40, m80);
s124 = _mm_or_si128(a870, a871);
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
a872 = (a851 + 1);
*(a872) = s125;
s126 = _mm_unpacklo_epi32(s123, s124);
s127 = _mm_unpackhi_epi32(s123, s124);
a873 = (a852 + 2);
*(a873) = s126;
a874 = (a852 + 3);
*(a874) = s127;
a875 = (a818 + 2);
s128 = *(a875);
a876 = (a818 + 10);
s129 = *(a876);
a877 = (a824 + 2);
a878 = *(a877);
a879 = _mm_xor_si128(a823, a878);
a880 = (a824 + 10);
a881 = *(a880);
a882 = _mm_xor_si128(a829, a881);
a883 = (a824 + 18);
a884 = *(a883);
a885 = _mm_xor_si128(a835, a884);
a886 = (a824 + 26);
a887 = *(a886);
a888 = _mm_xor_si128(a841, a887);
b109 = _mm_add_epi32(a879, a882);
b110 = _mm_add_epi32(b109, a885);
t43 = _mm_add_epi32(b110, a888);
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
m81 = _mm_add_epi32(s128, t43);
m82 = _mm_add_epi32(s129, t44);
m83 = _mm_add_epi32(s128, t44);
m84 = _mm_add_epi32(s129, t43);
d41 = _mm_cmpgt_epi32(m81, m82);
d42 = _mm_cmpgt_epi32(m83, m84);
a889 = _mm_andnot_si128(d41, m81);
a890 = _mm_and_si128(d41, m82);
s130 = _mm_or_si128(a889, a890);
a891 = _mm_andnot_si128(d42, m83);
a892 = _mm_and_si128(d42, m84);
s131 = _mm_or_si128(a891, a892);
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
a893 = (a851 + 2);
*(a893) = s132;
s133 = _mm_unpacklo_epi32(s130, s131);
s134 = _mm_unpackhi_epi32(s130, s131);
a894 = (a852 + 4);
*(a894) = s133;
a895 = (a852 + 5);
*(a895) = s134;
a896 = (a818 + 3);
s135 = *(a896);
a897 = (a818 + 11);
s136 = *(a897);
a898 = (a824 + 3);
a899 = *(a898);
a900 = _mm_xor_si128(a823, a899);
a901 = (a824 + 11);
a902 = *(a901);
a903 = _mm_xor_si128(a829, a902);
a904 = (a824 + 19);
a905 = *(a904);
a906 = _mm_xor_si128(a835, a905);
a907 = (a824 + 27);
a908 = *(a907);
a909 = _mm_xor_si128(a841, a908);
b111 = _mm_add_epi32(a900, a903);
b112 = _mm_add_epi32(b111, a906);
t45 = _mm_add_epi32(b112, a909);
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
m85 = _mm_add_epi32(s135, t45);
m86 = _mm_add_epi32(s136, t46);
m87 = _mm_add_epi32(s135, t46);
m88 = _mm_add_epi32(s136, t45);
d43 = _mm_cmpgt_epi32(m85, m86);
d44 = _mm_cmpgt_epi32(m87, m88);
a910 = _mm_andnot_si128(d43, m85);
a911 = _mm_and_si128(d43, m86);
s137 = _mm_or_si128(a910, a911);
a912 = _mm_andnot_si128(d44, m87);
a913 = _mm_and_si128(d44, m88);
s138 = _mm_or_si128(a912, a913);
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
a914 = (a851 + 3);
*(a914) = s139;
s140 = _mm_unpacklo_epi32(s137, s138);
s141 = _mm_unpackhi_epi32(s137, s138);
a915 = (a852 + 6);
*(a915) = s140;
a916 = (a852 + 7);
*(a916) = s141;
a917 = (a818 + 4);
s142 = *(a917);
a918 = (a818 + 12);
s143 = *(a918);
a919 = (a824 + 4);
a920 = *(a919);
a921 = _mm_xor_si128(a823, a920);
a922 = (a824 + 12);
a923 = *(a922);
a924 = _mm_xor_si128(a829, a923);
a925 = (a824 + 20);
a926 = *(a925);
a927 = _mm_xor_si128(a835, a926);
a928 = (a824 + 28);
a929 = *(a928);
a930 = _mm_xor_si128(a841, a929);
b113 = _mm_add_epi32(a921, a924);
b114 = _mm_add_epi32(b113, a927);
t47 = _mm_add_epi32(b114, a930);
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
m89 = _mm_add_epi32(s142, t47);
m90 = _mm_add_epi32(s143, t48);
m91 = _mm_add_epi32(s142, t48);
m92 = _mm_add_epi32(s143, t47);
d45 = _mm_cmpgt_epi32(m89, m90);
d46 = _mm_cmpgt_epi32(m91, m92);
a931 = _mm_andnot_si128(d45, m89);
a932 = _mm_and_si128(d45, m90);
s144 = _mm_or_si128(a931, a932);
a933 = _mm_andnot_si128(d46, m91);
a934 = _mm_and_si128(d46, m92);
s145 = _mm_or_si128(a933, a934);
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
a935 = (a851 + 4);
*(a935) = s146;
s147 = _mm_unpacklo_epi32(s144, s145);
s148 = _mm_unpackhi_epi32(s144, s145);
a936 = (a852 + 8);
*(a936) = s147;
a937 = (a852 + 9);
*(a937) = s148;
a938 = (a818 + 5);
s149 = *(a938);
a939 = (a818 + 13);
s150 = *(a939);
a940 = (a824 + 5);
a941 = *(a940);
a942 = _mm_xor_si128(a823, a941);
a943 = (a824 + 13);
a944 = *(a943);
a945 = _mm_xor_si128(a829, a944);
a946 = (a824 + 21);
a947 = *(a946);
a948 = _mm_xor_si128(a835, a947);
a949 = (a824 + 29);
a950 = *(a949);
a951 = _mm_xor_si128(a841, a950);
b115 = _mm_add_epi32(a942, a945);
b116 = _mm_add_epi32(b115, a948);
t49 = _mm_add_epi32(b116, a951);
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
m93 = _mm_add_epi32(s149, t49);
m94 = _mm_add_epi32(s150, t50);
m95 = _mm_add_epi32(s149, t50);
m96 = _mm_add_epi32(s150, t49);
d47 = _mm_cmpgt_epi32(m93, m94);
d48 = _mm_cmpgt_epi32(m95, m96);
a952 = _mm_andnot_si128(d47, m93);
a953 = _mm_and_si128(d47, m94);
s151 = _mm_or_si128(a952, a953);
a954 = _mm_andnot_si128(d48, m95);
a955 = _mm_and_si128(d48, m96);
s152 = _mm_or_si128(a954, a955);
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
a956 = (a851 + 5);
*(a956) = s153;
s154 = _mm_unpacklo_epi32(s151, s152);
s155 = _mm_unpackhi_epi32(s151, s152);
a957 = (a852 + 10);
*(a957) = s154;
a958 = (a852 + 11);
*(a958) = s155;
a959 = (a818 + 6);
s156 = *(a959);
a960 = (a818 + 14);
s157 = *(a960);
a961 = (a824 + 6);
a962 = *(a961);
a963 = _mm_xor_si128(a823, a962);
a964 = (a824 + 14);
a965 = *(a964);
a966 = _mm_xor_si128(a829, a965);
a967 = (a824 + 22);
a968 = *(a967);
a969 = _mm_xor_si128(a835, a968);
a970 = (a824 + 30);
a971 = *(a970);
a972 = _mm_xor_si128(a841, a971);
b117 = _mm_add_epi32(a963, a966);
b118 = _mm_add_epi32(b117, a969);
t51 = _mm_add_epi32(b118, a972);
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
m97 = _mm_add_epi32(s156, t51);
m98 = _mm_add_epi32(s157, t52);
m99 = _mm_add_epi32(s156, t52);
m100 = _mm_add_epi32(s157, t51);
d49 = _mm_cmpgt_epi32(m97, m98);
d50 = _mm_cmpgt_epi32(m99, m100);
a973 = _mm_andnot_si128(d49, m97);
a974 = _mm_and_si128(d49, m98);
s158 = _mm_or_si128(a973, a974);
a975 = _mm_andnot_si128(d50, m99);
a976 = _mm_and_si128(d50, m100);
s159 = _mm_or_si128(a975, a976);
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
a977 = (a851 + 6);
*(a977) = s160;
s161 = _mm_unpacklo_epi32(s158, s159);
s162 = _mm_unpackhi_epi32(s158, s159);
a978 = (a852 + 12);
*(a978) = s161;
a979 = (a852 + 13);
*(a979) = s162;
a980 = (a818 + 7);
s163 = *(a980);
a981 = (a818 + 15);
s164 = *(a981);
a982 = (a824 + 7);
a983 = *(a982);
a984 = _mm_xor_si128(a823, a983);
a985 = (a824 + 15);
a986 = *(a985);
a987 = _mm_xor_si128(a829, a986);
a988 = (a824 + 23);
a989 = *(a988);
a990 = _mm_xor_si128(a835, a989);
a991 = (a824 + 31);
a992 = *(a991);
a993 = _mm_xor_si128(a841, a992);
b119 = _mm_add_epi32(a984, a987);
b120 = _mm_add_epi32(b119, a990);
t53 = _mm_add_epi32(b120, a993);
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
m101 = _mm_add_epi32(s163, t53);
m102 = _mm_add_epi32(s164, t54);
m103 = _mm_add_epi32(s163, t54);
m104 = _mm_add_epi32(s164, t53);
d51 = _mm_cmpgt_epi32(m101, m102);
d52 = _mm_cmpgt_epi32(m103, m104);
a994 = _mm_andnot_si128(d51, m101);
a995 = _mm_and_si128(d51, m102);
s165 = _mm_or_si128(a994, a995);
a996 = _mm_andnot_si128(d52, m103);
a997 = _mm_and_si128(d52, m104);
s166 = _mm_or_si128(a996, a997);
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
a998 = (a851 + 7);
*(a998) = s167;
s168 = _mm_unpacklo_epi32(s165, s166);
s169 = _mm_unpackhi_epi32(s165, s166);
a999 = (a852 + 14);
*(a999) = s168;
a1000 = (a852 + 15);
*(a1000) = s169;
s170 = *(a852);
s171 = *(a936);
a1001 = (b104 + 4);
a1002 = *(a1001);
a1003 = _mm_set1_epi32(a1002);
a1004 = _mm_xor_si128(a1003, a825);
a1005 = (b104 + 5);
a1006 = *(a1005);
a1007 = _mm_set1_epi32(a1006);
a1008 = _mm_xor_si128(a1007, a831);
a1009 = (b104 + 6);
a1010 = *(a1009);
a1011 = _mm_set1_epi32(a1010);
a1012 = _mm_xor_si128(a1011, a837);
a1013 = (b104 + 7);
a1014 = *(a1013);
a1015 = _mm_set1_epi32(a1014);
a1016 = _mm_xor_si128(a1015, a843);
b121 = _mm_add_epi32(a1004, a1008);
b122 = _mm_add_epi32(b121, a1012);
t55 = _mm_add_epi32(b122, a1016);
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
m105 = _mm_add_epi32(s170, t55);
m106 = _mm_add_epi32(s171, t56);
m107 = _mm_add_epi32(s170, t56);
m108 = _mm_add_epi32(s171, t55);
d53 = _mm_cmpgt_epi32(m105, m106);
d54 = _mm_cmpgt_epi32(m107, m108);
a1017 = _mm_andnot_si128(d53, m105);
a1018 = _mm_and_si128(d53, m106);
s172 = _mm_or_si128(a1017, a1018);
a1019 = _mm_andnot_si128(d54, m107);
a1020 = _mm_and_si128(d54, m108);
s173 = _mm_or_si128(a1019, a1020);
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
a1021 = (a851 + 8);
*(a1021) = s174;
s175 = _mm_unpacklo_epi32(s172, s173);
s176 = _mm_unpackhi_epi32(s172, s173);
*(a818) = s175;
*(a854) = s176;
s177 = *(a853);
s178 = *(a937);
a1022 = _mm_xor_si128(a1003, a857);
a1023 = _mm_xor_si128(a1007, a860);
a1024 = _mm_xor_si128(a1011, a863);
a1025 = _mm_xor_si128(a1015, a866);
b123 = _mm_add_epi32(a1022, a1023);
b124 = _mm_add_epi32(b123, a1024);
t57 = _mm_add_epi32(b124, a1025);
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
m109 = _mm_add_epi32(s177, t57);
m110 = _mm_add_epi32(s178, t58);
m111 = _mm_add_epi32(s177, t58);
m112 = _mm_add_epi32(s178, t57);
d55 = _mm_cmpgt_epi32(m109, m110);
d56 = _mm_cmpgt_epi32(m111, m112);
a1026 = _mm_andnot_si128(d55, m109);
a1027 = _mm_and_si128(d55, m110);
s179 = _mm_or_si128(a1026, a1027);
a1028 = _mm_andnot_si128(d56, m111);
a1029 = _mm_and_si128(d56, m112);
s180 = _mm_or_si128(a1028, a1029);
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
a1030 = (a851 + 9);
*(a1030) = s181;
s182 = _mm_unpacklo_epi32(s179, s180);
s183 = _mm_unpackhi_epi32(s179, s180);
*(a875) = s182;
*(a896) = s183;
s184 = *(a873);
s185 = *(a957);
a1031 = _mm_xor_si128(a1003, a878);
a1032 = _mm_xor_si128(a1007, a881);
a1033 = _mm_xor_si128(a1011, a884);
a1034 = _mm_xor_si128(a1015, a887);
b125 = _mm_add_epi32(a1031, a1032);
b126 = _mm_add_epi32(b125, a1033);
t59 = _mm_add_epi32(b126, a1034);
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
m113 = _mm_add_epi32(s184, t59);
m114 = _mm_add_epi32(s185, t60);
m115 = _mm_add_epi32(s184, t60);
m116 = _mm_add_epi32(s185, t59);
d57 = _mm_cmpgt_epi32(m113, m114);
d58 = _mm_cmpgt_epi32(m115, m116);
a1035 = _mm_andnot_si128(d57, m113);
a1036 = _mm_and_si128(d57, m114);
s186 = _mm_or_si128(a1035, a1036);
a1037 = _mm_andnot_si128(d58, m115);
a1038 = _mm_and_si128(d58, m116);
s187 = _mm_or_si128(a1037, a1038);
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
a1039 = (a851 + 10);
*(a1039) = s188;
s189 = _mm_unpacklo_epi32(s186, s187);
s190 = _mm_unpackhi_epi32(s186, s187);
*(a917) = s189;
*(a938) = s190;
s191 = *(a874);
s192 = *(a958);
a1040 = _mm_xor_si128(a1003, a899);
a1041 = _mm_xor_si128(a1007, a902);
a1042 = _mm_xor_si128(a1011, a905);
a1043 = _mm_xor_si128(a1015, a908);
b127 = _mm_add_epi32(a1040, a1041);
b128 = _mm_add_epi32(b127, a1042);
t61 = _mm_add_epi32(b128, a1043);
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
m117 = _mm_add_epi32(s191, t61);
m118 = _mm_add_epi32(s192, t62);
m119 = _mm_add_epi32(s191, t62);
m120 = _mm_add_epi32(s192, t61);
d59 = _mm_cmpgt_epi32(m117, m118);
d60 = _mm_cmpgt_epi32(m119, m120);
a1044 = _mm_andnot_si128(d59, m117);
a1045 = _mm_and_si128(d59, m118);
s193 = _mm_or_si128(a1044, a1045);
a1046 = _mm_andnot_si128(d60, m119);
a1047 = _mm_and_si128(d60, m120);
s194 = _mm_or_si128(a1046, a1047);
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
a1048 = (a851 + 11);
*(a1048) = s195;
s196 = _mm_unpacklo_epi32(s193, s194);
s197 = _mm_unpackhi_epi32(s193, s194);
*(a959) = s196;
*(a980) = s197;
s198 = *(a894);
s199 = *(a978);
a1049 = _mm_xor_si128(a1003, a920);
a1050 = _mm_xor_si128(a1007, a923);
a1051 = _mm_xor_si128(a1011, a926);
a1052 = _mm_xor_si128(a1015, a929);
b129 = _mm_add_epi32(a1049, a1050);
b130 = _mm_add_epi32(b129, a1051);
t63 = _mm_add_epi32(b130, a1052);
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
m121 = _mm_add_epi32(s198, t63);
m122 = _mm_add_epi32(s199, t64);
m123 = _mm_add_epi32(s198, t64);
m124 = _mm_add_epi32(s199, t63);
d61 = _mm_cmpgt_epi32(m121, m122);
d62 = _mm_cmpgt_epi32(m123, m124);
a1053 = _mm_andnot_si128(d61, m121);
a1054 = _mm_and_si128(d61, m122);
s200 = _mm_or_si128(a1053, a1054);
a1055 = _mm_andnot_si128(d62, m123);
a1056 = _mm_and_si128(d62, m124);
s201 = _mm_or_si128(a1055, a1056);
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
a1057 = (a851 + 12);
*(a1057) = s202;
s203 = _mm_unpacklo_epi32(s200, s201);
s204 = _mm_unpackhi_epi32(s200, s201);
*(a819) = s203;
*(a855) = s204;
s205 = *(a895);
s206 = *(a979);
a1058 = _mm_xor_si128(a1003, a941);
a1059 = _mm_xor_si128(a1007, a944);
a1060 = _mm_xor_si128(a1011, a947);
a1061 = _mm_xor_si128(a1015, a950);
b131 = _mm_add_epi32(a1058, a1059);
b132 = _mm_add_epi32(b131, a1060);
t65 = _mm_add_epi32(b132, a1061);
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
m125 = _mm_add_epi32(s205, t65);
m126 = _mm_add_epi32(s206, t66);
m127 = _mm_add_epi32(s205, t66);
m128 = _mm_add_epi32(s206, t65);
d63 = _mm_cmpgt_epi32(m125, m126);
d64 = _mm_cmpgt_epi32(m127, m128);
a1062 = _mm_andnot_si128(d63, m125);
a1063 = _mm_and_si128(d63, m126);
s207 = _mm_or_si128(a1062, a1063);
a1064 = _mm_andnot_si128(d64, m127);
a1065 = _mm_and_si128(d64, m128);
s208 = _mm_or_si128(a1064, a1065);
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
a1066 = (a851 + 13);
*(a1066) = s209;
s210 = _mm_unpacklo_epi32(s207, s208);
s211 = _mm_unpackhi_epi32(s207, s208);
*(a876) = s210;
*(a897) = s211;
s212 = *(a915);
s213 = *(a999);
a1067 = _mm_xor_si128(a1003, a962);
a1068 = _mm_xor_si128(a1007, a965);
a1069 = _mm_xor_si128(a1011, a968);
a1070 = _mm_xor_si128(a1015, a971);
b133 = _mm_add_epi32(a1067, a1068);
b134 = _mm_add_epi32(b133, a1069);
t67 = _mm_add_epi32(b134, a1070);
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
m129 = _mm_add_epi32(s212, t67);
m130 = _mm_add_epi32(s213, t68);
m131 = _mm_add_epi32(s212, t68);
m132 = _mm_add_epi32(s213, t67);
d65 = _mm_cmpgt_epi32(m129, m130);
d66 = _mm_cmpgt_epi32(m131, m132);
a1071 = _mm_andnot_si128(d65, m129);
a1072 = _mm_and_si128(d65, m130);
s214 = _mm_or_si128(a1071, a1072);
a1073 = _mm_andnot_si128(d66, m131);
a1074 = _mm_and_si128(d66, m132);
s215 = _mm_or_si128(a1073, a1074);
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
a1075 = (a851 + 14);
*(a1075) = s216;
s217 = _mm_unpacklo_epi32(s214, s215);
s218 = _mm_unpackhi_epi32(s214, s215);
*(a918) = s217;
*(a939) = s218;
s219 = *(a916);
s220 = *(a1000);
a1076 = _mm_xor_si128(a1003, a983);
a1077 = _mm_xor_si128(a1007, a986);
a1078 = _mm_xor_si128(a1011, a989);
a1079 = _mm_xor_si128(a1015, a992);
b135 = _mm_add_epi32(a1076, a1077);
b136 = _mm_add_epi32(b135, a1078);
t69 = _mm_add_epi32(b136, a1079);
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
m133 = _mm_add_epi32(s219, t69);
m134 = _mm_add_epi32(s220, t70);
m135 = _mm_add_epi32(s219, t70);
m136 = _mm_add_epi32(s220, t69);
d67 = _mm_cmpgt_epi32(m133, m134);
d68 = _mm_cmpgt_epi32(m135, m136);
a1080 = _mm_andnot_si128(d67, m133);
a1081 = _mm_and_si128(d67, m134);
s221 = _mm_or_si128(a1080, a1081);
a1082 = _mm_andnot_si128(d68, m135);
a1083 = _mm_and_si128(d68, m136);
s222 = _mm_or_si128(a1082, a1083);
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
a1084 = (a851 + 15);
*(a1084) = s223;
s224 = _mm_unpacklo_epi32(s221, s222);
s225 = _mm_unpackhi_epi32(s221, s222);
*(a960) = s224;
*(a981) = s225;
}
/* skip */
}

View File

@@ -1,36 +0,0 @@
/***************************************************************
This code was generated by Spiral 6.0 beta, www.spiral.net --
Copyright (c) 2005-2008, Carnegie Mellon University.
All rights reserved.
The code is distributed under the GNU General Public License (GPL)
(see http://www.gnu.org/copyleft/gpl.html)
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************/
#include <stdint.h>
#define K 7
#define RATE 4
#define POLYS { 109, 79, 83, 109 }
#define NUMSTATES 64
#define FRAMEBITS 2048
#define DECISIONTYPE unsigned int
#define DECISIONTYPE_BITSIZE 32
#define COMPUTETYPE uint32_t
#define EBN0 3
#define TRIALS 10000
#define __int32 int
#define FUNC FULL_SPIRAL
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 2000000000

View File

@@ -1,380 +0,0 @@
#
/*
* Copyright (C) 2013
* Jan van Katwijk (J.vanKatwijk@gmail.com)
* Lazy Chair Computing
*
* This file is part of the Qt-DAB
* Qt-DAB is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Qt-DAB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Qt-DAB; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* The convolutional decoder for the FIC blocks has fixed sized
* blocks, so we can use pre-generated code - for that specific
* sized blocks - generated by the spiral project
*/
#include <stdio.h>
#include <stdlib.h>
#include "mm_malloc.h"
#include "viterbi-768.h"
#include <cstring>
#ifdef __MINGW32__
#include <intrin.h>
#include <malloc.h>
#include <windows.h>
#endif
//
// It took a while to discover that the polynomes we used
// in our own "straightforward" implementation was bitreversed!!
// The official one is on top.
#define K 7
#define POLYS { 0155, 0117, 0123, 0155}
//#define POLYS {109, 79, 83, 109}
// In the reversed form the polys look:
//#define POLYS { 0133, 0171, 0145, 0133 }
//#define POLYS { 91, 121, 101, 91 }
#define METRICSHIFT 0
#define PRECISIONSHIFT 0
#define RENORMALIZE_THRESHOLD 137
//
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
#if (K-1<8)
#define ADDSHIFT (8-(K-1))
#define SUBSHIFT 0
#elif (K-1>8)
#define ADDSHIFT 0
#define SUBSHIFT ((K-1)-8)
#else
#define ADDSHIFT 0
#define SUBSHIFT 0
#endif
static uint8_t Partab [] =
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
//
// One could create the table above, i.e. a 256 entry
// odd-parity lookup table by the following function
// It is now precomputed
void viterbi_768::partab_init (void){
int16_t i,cnt,ti;
for (i = 0; i < 256; i++){
cnt = 0;
ti = i;
while (ti != 0) {
if (ti & 1) cnt++;
ti >>= 1;
}
Partab [i] = cnt & 1;
}
}
int viterbi_768::parity (int x){
/* Fold down to one byte */
x ^= (x >> 16);
x ^= (x >> 8);
return Partab [x];
// return parityb(x);
}
static inline
void renormalize (COMPUTETYPE* X, COMPUTETYPE threshold){
int32_t i;
if (X [0] > threshold){
COMPUTETYPE min = X [0];
for (i = 0; i < NUMSTATES; i++)
if (min > X[i])
min = X[i];
for (i = 0; i < NUMSTATES; i++)
X[i] -= min;
}
}
//
//
// The main use of the viterbi decoder is in handling the FIC blocks
// There are (in mode 1) 3 ofdm blocks, giving 4 FIC blocks
// There all have a predefined length. In that case we use the
// "fast" (i.e. spiral) code, otherwise we use the generic code
viterbi_768::viterbi_768 (int16_t wordlength, bool spiral) {
int polys [RATE] = POLYS;
int16_t i, state;
#ifdef __MINGW32__
uint32_t size;
#endif
frameBits = wordlength;
this -> spiral = spiral;
// partab_init ();
// B I G N O T E The spiral code uses (wordLength + (K - 1) * sizeof ...
// However, the application then crashes, so something is not OK
// By doubling the size, the problem disappears. It is not solved though
// and not further investigation.
#ifdef __MINGW32__
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0xF;
data = (uint8_t *)_aligned_malloc (size, 16);
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE) + 16) & ~0xF;
symbols = (COMPUTETYPE *)_aligned_malloc (size, 16);
size = 2 * (wordlength + (K - 1)) * sizeof (decision_t);
size = (size + 16) & ~0xF;
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
#else
if (posix_memalign ((void**)&data, 16,
(wordlength + (K - 1))/ 8 + 1)){
printf("Allocation of data array failed\n");
}
if (posix_memalign ((void**)&symbols, 16,
RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE))){
printf("Allocation of symbols array failed\n");
}
if (posix_memalign ((void**)&(vp. decisions),
16,
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
printf ("Allocation of vp decisions failed\n");
}
#endif
for (state = 0; state < NUMSTATES / 2; state++) {
for (i = 0; i < RATE; i++)
Branchtab [i * NUMSTATES / 2 + state] =
(polys[i] < 0) ^
parity((2 * state) & abs (polys[i])) ? 255 : 0;
}
//
init_viterbi (&vp, 0);
}
viterbi_768::~viterbi_768 (void) {
#ifdef __MINGW32__
_aligned_free (vp. decisions);
_aligned_free (data);
_aligned_free (symbols);
#else
free (vp. decisions);
free (data);
free (symbols);
#endif
}
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
static inline
uint8_t getbit (uint8_t v, int32_t o) {
return (v & maskTable [o]) ? 1 : 0;
}
//static
//uint8_t getbit (uint8_t v, int32_t o) {
//uint8_t mask = 1 << (7 - o);
// return (v & mask) ? 1 : 0;
//}
// depends: POLYS, RATE, COMPUTETYPE
// encode was only used for testing purposes
//void encode (/*const*/ unsigned char *bytes, COMPUTETYPE *symbols, int nbits) {
//int i, k;
//int polys [RATE] = POLYS;
//int sr = 0;
//
//// FIXME: this is slowish
//// -- remember about the padding!
// for (i = 0; i < nbits + (K - 1); i++) {
// int b = bytes[i/8];
// int j = i % 8;
// int bit = (b >> (7-j)) & 1;
//
// sr = (sr << 1) | bit;
// for (k = 0; k < RATE; k++)
// *(symbols++) = parity(sr & polys[k]);
// }
//}
// Note that our DAB environment maps the softbits to -127 .. 127
// we have to map that onto 0 .. 255
void viterbi_768::deconvolve (int16_t *input, uint8_t *output) {
uint32_t i;
init_viterbi (&vp, 0);
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
int16_t temp = input [i] + 127;
if (temp < 0) temp = 0;
if (temp > 255) temp = 255;
symbols [i] = temp;
}
if (!spiral)
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
else
update_viterbi_blk_SPIRAL (&vp, symbols, frameBits + (K - 1));
chainback_viterbi (&vp, data, frameBits, 0);
for (i = 0; i < (uint16_t)frameBits; i ++)
output [i] = getbit (data [i >> 3], i & 07);
}
/* C-language butterfly */
void viterbi_768::BFLY (int i, int s, COMPUTETYPE * syms,
struct v * vp, decision_t * d) {
int32_t j, decision0, decision1;
COMPUTETYPE metric,m0,m1,m2,m3;
metric =0;
for (j = 0; j < RATE;j++)
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s*RATE+j]) >>
METRICSHIFT ;
metric = metric >> PRECISIONSHIFT;
const COMPUTETYPE max =
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
m0 = vp->old_metrics->t [i] + metric;
m1 = vp->old_metrics->t [i + NUMSTATES / 2] + (max - metric);
m2 = vp->old_metrics->t [i] + (max - metric);
m3 = vp->old_metrics->t [i + NUMSTATES / 2] + metric;
decision0 = ((int32_t)(m0 - m1)) > 0;
decision1 = ((int32_t)(m2 - m3)) > 0;
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
}
/* Update decoder with a block of demodulated symbols
* Note that nbits is the number of decoded data bits, not the number
* of symbols!
*/
void viterbi_768::update_viterbi_blk_GENERIC (struct v *vp,
COMPUTETYPE *syms,
int16_t nbits){
decision_t *d = (decision_t *)vp -> decisions;
int32_t s, i;
for (s = 0; s < nbits; s++)
memset (&d [s], 0, sizeof (decision_t));
for (s = 0; s < nbits; s++){
void *tmp;
for (i = 0; i < NUMSTATES / 2; i++)
BFLY (i, s, syms, vp, vp -> decisions);
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
// Swap pointers to old and new metrics
tmp = vp -> old_metrics;
vp -> old_metrics = vp -> new_metrics;
vp -> new_metrics = (metric_t *)tmp;
}
}
extern "C" {
#if defined(SSE_AVAILABLE)
void FULL_SPIRAL_sse (int,
#elif defined(NEON_AVAILABLE)
void FULL_SPIRAL_neon (int,
#else
void FULL_SPIRAL_no_sse (int,
#endif
COMPUTETYPE *Y,
COMPUTETYPE *X,
COMPUTETYPE *syms,
DECISIONTYPE *dec,
COMPUTETYPE *Branchtab);
}
void viterbi_768::update_viterbi_blk_SPIRAL (struct v *vp,
COMPUTETYPE *syms,
int16_t nbits){
decision_t *d = (decision_t *)vp -> decisions;
int32_t s;
for (s = 0; s < nbits; s++)
memset (d + s, 0, sizeof(decision_t));
#if defined(SSE_AVAILABLE)
FULL_SPIRAL_sse (nbits,
#elif defined(NEON_AVAILABLE)
FULL_SPIRAL_neon (nbits,
#else
FULL_SPIRAL_no_sse (nbits,
#endif
vp -> new_metrics -> t,
vp -> old_metrics -> t,
syms,
d -> t, Branchtab);
}
//
/* Viterbi chainback */
void viterbi_768::chainback_viterbi (struct v *vp,
uint8_t *data, /* Decoded output data */
int16_t nbits, /* Number of data bits */
uint16_t endstate){ /*Terminal encoder state */
decision_t *d = vp -> decisions;
/* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate = (endstate % NUMSTATES) << ADDSHIFT;
/* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += (K - 1); /* Look past tail */
while (nbits-- != 0){
int k;
// int l = (endstate >> ADDSHIFT) / 32;
// int m = (endstate >> ADDSHIFT) % 32;
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
((endstate>>ADDSHIFT) % 32)) & 1;
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
data [nbits >> 3] = endstate >> SUBSHIFT;
}
}
/* Initialize Viterbi decoder for start of new frame */
void viterbi_768::init_viterbi (struct v *p, int16_t starting_state){
struct v *vp = p;
int32_t i;
for (i = 0; i < NUMSTATES; i++)
vp -> metrics1.t[i] = 63;
vp -> old_metrics = &vp -> metrics1;
vp -> new_metrics = &vp -> metrics2;
/* Bias known start state */
vp -> old_metrics-> t[starting_state & (NUMSTATES-1)] = 0;
}