mirror of
https://github.com/JvanKatwijk/dabradio
synced 2025-10-06 00:02:49 +02:00
...
This commit is contained in:
@@ -129,7 +129,6 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
|
||||
./includes/backend/data/mot
|
||||
./includes/backend/data/journaline
|
||||
./includes/support
|
||||
./includes/support/viterbi_768
|
||||
./includes/output
|
||||
./devices
|
||||
/usr/include/
|
||||
@@ -175,10 +174,9 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
|
||||
./includes/output/audio-base.h
|
||||
./includes/output/newconverter.h
|
||||
./includes/output/audiosink.h
|
||||
./includes/support/viterbi_768/viterbi-768.h
|
||||
./includes/support/viterbi-handler.h
|
||||
./includes/support/fft-handler.h
|
||||
./includes/support/ringbuffer.h
|
||||
./includes/support/Xtan2.h
|
||||
./includes/support/dab-params.h
|
||||
./includes/support/band-handler.h
|
||||
./includes/support/text-mapper.h
|
||||
@@ -228,13 +226,11 @@ add_definitions (-DTHREADED_DECODING) # uncomment for use for an RPI
|
||||
./src/output/fir-filters.cpp
|
||||
./src/output/audiosink.cpp
|
||||
./src/support/fft-handler.cpp
|
||||
./src/support/Xtan2.cpp
|
||||
./src/support/dab-params.cpp
|
||||
./src/support/band-handler.cpp
|
||||
./src/support/text-mapper.cpp
|
||||
./src/support/dab_tables.cpp
|
||||
./src/support/viterbi_768/viterbi-768.cpp
|
||||
./src/support/viterbi_768/spiral-no-sse.c
|
||||
./src/support/viterbi-handler.cpp
|
||||
)
|
||||
|
||||
set (${objectName}_MOCS
|
||||
|
@@ -133,7 +133,6 @@ endif ()
|
||||
./includes/backend/data/journaline
|
||||
./includes/backend/data/mot
|
||||
./includes/support
|
||||
./includes/support/viterbi_768
|
||||
./includes/output
|
||||
./devices
|
||||
/usr/include/
|
||||
@@ -178,10 +177,9 @@ endif ()
|
||||
./includes/output/fir-filters.h
|
||||
./includes/output/audio-base.h
|
||||
./includes/output/newconverter.h
|
||||
./includes/support/viterbi_768/viterbi-768.h
|
||||
./includes/support/viterbi-handler.h
|
||||
./includes/support/fft-handler.h
|
||||
./includes/support/ringbuffer.h
|
||||
./includes/support/Xtan2.h
|
||||
./includes/support/dab-params.h
|
||||
./includes/support/band-handler.h
|
||||
./includes/support/text-mapper.h
|
||||
@@ -230,13 +228,11 @@ endif ()
|
||||
./src/output/newconverter.cpp
|
||||
./src/output/fir-filters.cpp
|
||||
./src/support/fft-handler.cpp
|
||||
./src/support/Xtan2.cpp
|
||||
./src/support/dab-params.cpp
|
||||
./src/support/band-handler.cpp
|
||||
./src/support/text-mapper.cpp
|
||||
./src/support/dab_tables.cpp
|
||||
./src/support/viterbi_768/viterbi-768.cpp
|
||||
./src/support/viterbi_768/spiral-no-sse.c
|
||||
./src/support/viterbi-handler.cpp
|
||||
)
|
||||
|
||||
set (${objectName}_UIS
|
||||
|
38
README.md
38
README.md
@@ -3,7 +3,38 @@
|
||||
|
||||
dabradio is a Software for Windows, Linux and Raspberry Pi for listening to terrestrial Digital Audio Broadcasting (DAB and DAB+). It is the little brother of Qt-DAB.
|
||||
|
||||
NEW: the software should be able to run with the mirics SDRplay-duo
|
||||
------------------------------------------------------------------------
|
||||
NEW: a script to build an executable on an rpi
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
In order to ease building an executable on a Raspberry 2 or 3, a
|
||||
script, "script-rpi.sh" is available that will do all
|
||||
installations of required libraries, and build an executable
|
||||
for an executable "dabradio-1.0", configured for DABsticks and sdrPlay
|
||||
devices.
|
||||
|
||||
The script will load sources for a DABstick handler and build
|
||||
and install the library.
|
||||
An installer for the sdrPlay devices can be obtained from "www.sdrplay.com".
|
||||
|
||||
The script assumes an installation of Stretch on the RPI device,
|
||||
it is assumed "git" is installed to fetch the source package.
|
||||
|
||||
sudo apt-get install git
|
||||
git clone https://github.com/JvanKatwijk/dabradio
|
||||
cd dabradio
|
||||
chmod 777 script-rpi.sh
|
||||
./script-rpi.sh
|
||||
|
||||
Note that the installer for the rtlsdr handler will create an "udev" file
|
||||
(in "/etc/udev/rules.d"), that will be active only after a restart (or a restart of the udev subsystem).
|
||||
|
||||
During installations of the various packages, several times
|
||||
one has to acknowledge when asked for, so while it takes a few minutes
|
||||
it is wise to stay around.
|
||||
|
||||
The executable will be installed in the subdirectory "linux-bin".
|
||||
|
||||
|
||||
------------------------------------------------------------------
|
||||
Table of Contents
|
||||
@@ -93,8 +124,9 @@ some gaps in the audio output.
|
||||
Windows
|
||||
------------------------------------------------------------------
|
||||
|
||||
Windows releases can be found at https://github.com/JvanKatwijk/dabradio/releases. The zipped folder found there contains
|
||||
the executable for this and some other windows prohgrams, as well as the libraries required.
|
||||
For Windows, an *installer* is available in the releases
|
||||
section. The installer will aldo install - if not installed
|
||||
already - the library for SDRplay support.
|
||||
|
||||
If you want to compile it by yourself, please install Qt
|
||||
through its online installer, see https://www.qt.io/
|
||||
|
@@ -68,7 +68,7 @@ int32_t i;
|
||||
this -> nrBlocks = params. get_L ();
|
||||
this -> carriers = params. get_carriers ();
|
||||
this -> carrierDiff = params. get_carrierDiff ();
|
||||
this -> giveSignal = false;
|
||||
this -> scanMode = false;
|
||||
|
||||
ofdmBuffer. resize (2 * T_s);
|
||||
ofdmBufferIndex = 0;
|
||||
@@ -92,9 +92,10 @@ int32_t i;
|
||||
}
|
||||
}
|
||||
|
||||
void dabProcessor::start (int frequency, bool giveSignal) {
|
||||
this -> frequency = frequency;
|
||||
this -> giveSignal = giveSignal;
|
||||
void dabProcessor::start (int frequency, bool scanMode) {
|
||||
this -> frequency = frequency;
|
||||
this -> scanMode = scanMode;
|
||||
startFailures = 0;
|
||||
this -> QThread::start ();
|
||||
}
|
||||
|
||||
@@ -137,7 +138,7 @@ notSynced:
|
||||
break; // yes, we are ready
|
||||
|
||||
case NO_DIP_FOUND:
|
||||
if (giveSignal && (++ attempts >= 5)) {
|
||||
if (scanMode && (++ attempts >= 5)) {
|
||||
emit (No_Signal_Found ());
|
||||
attempts = 0;
|
||||
}
|
||||
@@ -174,9 +175,15 @@ SyncOnPhase:
|
||||
if (!correctionNeeded) {
|
||||
setSyncLost ();
|
||||
}
|
||||
startFailures ++;
|
||||
if (scanMode && (startFailures > 3)) {
|
||||
emit (No_Signal_Found ());
|
||||
startFailures = 0;
|
||||
}
|
||||
goto notSynced;
|
||||
}
|
||||
|
||||
startFailures = 0;
|
||||
/**
|
||||
* Once here, we are synchronized, we need to copy the data we
|
||||
* used for synchronization for block 0
|
||||
@@ -252,6 +259,21 @@ NewOffset:
|
||||
*/
|
||||
myReader. getSamples (ofdmBuffer. data (),
|
||||
T_null, coarseOffset);
|
||||
|
||||
float sum = 0;
|
||||
for (i = 0; i < T_null; i ++)
|
||||
sum += abs (ofdmBuffer [i]);
|
||||
sum /= T_null;
|
||||
|
||||
static float snr = 0;
|
||||
snr = 0.9 * snr +
|
||||
0.1 * 20 * log10 ((myReader. get_sLevel () + 0.005) / sum);
|
||||
static int ccc = 0;
|
||||
if (++ccc > 10) {
|
||||
ccc = 0;
|
||||
show_snr ((int)snr);
|
||||
}
|
||||
|
||||
/**
|
||||
* The first sample to be found for the next frame should be T_g
|
||||
* samples ahead. Before going for the next frame, we
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB program
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#
|
||||
@@ -92,7 +92,8 @@ private:
|
||||
mscHandler my_mscHandler;
|
||||
int32_t frequency;
|
||||
int16_t attempts;
|
||||
bool giveSignal;
|
||||
bool scanMode;
|
||||
int startFailures;
|
||||
int32_t T_null;
|
||||
int32_t T_u;
|
||||
int32_t T_s;
|
||||
@@ -112,7 +113,6 @@ private:
|
||||
uint32_t ofdmSymbolCount;
|
||||
phaseReference phaseSynchronizer;
|
||||
ofdmDecoder my_ofdmDecoder;
|
||||
bool wasSecond (int16_t, dabParams *);
|
||||
virtual void run (void);
|
||||
bool isReset;
|
||||
signals:
|
||||
@@ -120,8 +120,8 @@ signals:
|
||||
void No_Signal_Found (void);
|
||||
void setSyncLost (void);
|
||||
void showCoordinates (int, int);
|
||||
// void showCoordinates (float, float);
|
||||
void show_Spectrum (int);
|
||||
void show_snr (int);
|
||||
};
|
||||
#endif
|
||||
|
||||
|
69
dabradio-installer.iss
Normal file
69
dabradio-installer.iss
Normal file
@@ -0,0 +1,69 @@
|
||||
|
||||
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
|
||||
|
||||
#define MyAppName "dabradio"
|
||||
#define MyAppVersion "1.0"
|
||||
#define MyAppPublisher "Lazy Chair Computing"
|
||||
#define MyAppURL "https://github.com/JvanKatwijk/dabradio"
|
||||
#define MyAppExeName "dabradio-1.0.exe";
|
||||
|
||||
[Setup]
|
||||
; NOTE: The value of AppId uniquely identifies this application.
|
||||
; Do not use the same AppId value in installers for other applications.
|
||||
; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
|
||||
AppId= {{B4C322AE-1C29-47E8-BF74-ED434065488D}
|
||||
AppName={#MyAppName}
|
||||
AppVersion={#MyAppVersion}
|
||||
;AppVerName={#MyAppName} {#MyAppVersion}
|
||||
AppPublisher={#MyAppPublisher}
|
||||
AppPublisherURL={#MyAppURL}
|
||||
AppSupportURL={#MyAppURL}
|
||||
AppUpdatesURL={#MyAppURL}
|
||||
DefaultDirName={pf}\{#MyAppName}
|
||||
DisableProgramGroupPage=yes
|
||||
LicenseFile=E:\sdr-j-development\windows-qt-dab\COPYRIGHT.this_software
|
||||
InfoBeforeFile=E:\sdr-j-development\windows-dabradio\preamble.txt
|
||||
OutputBaseFilename=setup-dabradio
|
||||
Compression=lzma
|
||||
SolidCompression=yes
|
||||
|
||||
[Languages]
|
||||
Name: "english"; MessagesFile: "compiler:Default.isl"
|
||||
|
||||
[Tasks]
|
||||
Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
|
||||
|
||||
[Files]
|
||||
Source: "E:\sdr-j-development\windows-dabradio\dabradio-1.0.exe"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: "E:\sdr-j-development\windows-dabradio\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
|
||||
Source: "E:\sdr-j-development\SDRplay_RSP_API-Windows-2.13.1.exe"; DestDir: "{app}"; AfterInstall : install_sdrplayApi
|
||||
|
||||
[Icons]
|
||||
Name: "{commonprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
|
||||
Name: "{commondesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
|
||||
|
||||
[Run]
|
||||
Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
|
||||
|
||||
[code]
|
||||
procedure install_sdrplayApi;
|
||||
var
|
||||
resultCode : Integer;
|
||||
Names : TArrayOfString;
|
||||
I : Integer;
|
||||
found : Boolean;
|
||||
|
||||
begin
|
||||
|
||||
RegGetSubkeyNames(HKEY_LOCAL_MACHINE, 'SOFTWARE\MiricsSDR', Names);
|
||||
for I := 0 to GetArrayLength(Names)-1 do
|
||||
if Names [I] = 'API' then found := true;
|
||||
|
||||
if not found
|
||||
then
|
||||
begin
|
||||
MsgBox ('Software\MiricsSDR\API not found', mbInformation, MB_OK);
|
||||
Exec (ExpandConstant('{app}\SDRplay_RSP_API-Windows-2.13.1.exe'), '', '', SW_SHOWNORMAL,
|
||||
ewWaitUntilTerminated, ResultCode);
|
||||
end
|
||||
end;
|
53
dabradio.pro
53
dabradio.pro
@@ -33,7 +33,6 @@ DEPENDPATH += . \
|
||||
./src/backend/data/mot \
|
||||
./src/output \
|
||||
./src/support \
|
||||
./src/support/viterbi_768 \
|
||||
./devices \
|
||||
./devices/rawfiles \
|
||||
./devices/wavfiles \
|
||||
@@ -44,7 +43,6 @@ DEPENDPATH += . \
|
||||
./includes/backend/data/mot \
|
||||
./includes/output \
|
||||
./includes/support \
|
||||
./includes/support/viterbi_768
|
||||
|
||||
INCLUDEPATH += . \
|
||||
./ \
|
||||
@@ -59,7 +57,6 @@ INCLUDEPATH += . \
|
||||
./includes/backend/data/mot \
|
||||
./includes/output \
|
||||
./includes/support \
|
||||
./includes/support/viterbi_768 \
|
||||
./devices \
|
||||
./devices/rawfiles \
|
||||
./devices/wavfiles
|
||||
@@ -106,10 +103,9 @@ HEADERS += ./radio.h \
|
||||
./includes/output/audio-base.h \
|
||||
./includes/output/newconverter.h \
|
||||
./includes/output/audiosink.h \
|
||||
./includes/support/viterbi_768/viterbi-768.h \
|
||||
./includes/support/viterbi-handler.h \
|
||||
./includes/support/fft-handler.h \
|
||||
./includes/support/ringbuffer.h \
|
||||
./includes/support/Xtan2.h \
|
||||
./includes/support/dab-params.h \
|
||||
./includes/support/band-handler.h \
|
||||
./includes/support/text-mapper.h \
|
||||
@@ -158,9 +154,8 @@ SOURCES += ./main.cpp \
|
||||
./src/output/audio-base.cpp \
|
||||
./src/output/newconverter.cpp \
|
||||
./src/output/audiosink.cpp \
|
||||
./src/support/viterbi_768/viterbi-768.cpp \
|
||||
./src/support/viterbi-handler.cpp \
|
||||
./src/support/fft-handler.cpp \
|
||||
./src/support/Xtan2.cpp \
|
||||
./src/support/dab-params.cpp \
|
||||
./src/support/band-handler.cpp \
|
||||
./src/support/text-mapper.cpp \
|
||||
@@ -200,8 +195,8 @@ LIBS += -lfaad
|
||||
# (you obviously have libraries installed for the selected ones)
|
||||
CONFIG += dabstick
|
||||
CONFIG += sdrplay
|
||||
CONFIG += airspy
|
||||
CONFIG += hackrf
|
||||
#CONFIG += airspy
|
||||
#CONFIG += hackrf
|
||||
#if you want to listen remote, uncomment
|
||||
#CONFIG += tcp-streamer # use for remote listening
|
||||
#otherwise, if you want to use the default qt way of soud out
|
||||
@@ -214,13 +209,6 @@ DEFINES += __THREADED_BACKEND
|
||||
|
||||
#and this one is experimental
|
||||
DEFINES += PRESET_NAME
|
||||
|
||||
#and these one is just experimental,
|
||||
#NO_SSE is always safe
|
||||
#CONFIG += NEON_RPI2
|
||||
#CONFIG += NEON_RPI3
|
||||
CONFIG += SSE
|
||||
#CONFIG += NO_SSE
|
||||
}
|
||||
#
|
||||
# an attempt to have it run under W32 through cross compilation
|
||||
@@ -262,9 +250,7 @@ FORMS += ./forms/dabradio.ui
|
||||
CONFIG += airspy
|
||||
CONFIG += dabstick
|
||||
CONFIG += sdrplay
|
||||
CONFIG += hackrf
|
||||
CONFIG += NO_SSE
|
||||
|
||||
#CONFIG += hackrf
|
||||
|
||||
#for the raspberry you definitely want this one
|
||||
#when this one is enabled, load is spread over different threads
|
||||
@@ -341,32 +327,3 @@ qt-audio {
|
||||
./src/output/Qt-audiodevice.cpp
|
||||
}
|
||||
|
||||
# for RPI2 use:
|
||||
NEON_RPI2 {
|
||||
DEFINES += NEON_AVAILABLE
|
||||
QMAKE_CFLAGS += -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4
|
||||
QMAKE_CXXFLAGS += -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4
|
||||
HEADERS += ./src/support/viterbi_768/spiral-neon.h
|
||||
SOURCES += ./src/support/viterbi_768/spiral-neon.c
|
||||
}
|
||||
|
||||
# for RPI3 use:
|
||||
NEON_RPI3 {
|
||||
DEFINES += NEON_AVAILABLE
|
||||
# QMAKE_CFLAGS += -mcpu=cortex-a53 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mneon-for-64bits
|
||||
# QMAKE_CXXFLAGS += -mcpu=cortex-a53 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mneon-for-64bits
|
||||
HEADERS += ./src/support/viterbi_768/spiral-neon.h
|
||||
SOURCES += ./src/support/viterbi_768/spiral-neon.c
|
||||
}
|
||||
|
||||
SSE {
|
||||
DEFINES += SSE_AVAILABLE
|
||||
HEADERS += ./src/support/viterbi_768/spiral-sse.h
|
||||
SOURCES += ./src/support/viterbi_768/spiral-sse.c
|
||||
}
|
||||
|
||||
NO_SSE {
|
||||
HEADERS += ./src/support/viterbi_768/spiral-no-sse.h
|
||||
SOURCES += ./src/support/viterbi_768/spiral-no-sse.c
|
||||
}
|
||||
|
||||
|
@@ -167,12 +167,12 @@ int i, k;
|
||||
gains = new int [gainsCount];
|
||||
gainsCount = rtlsdr_get_tuner_gains (device, gains);
|
||||
for (i = gainsCount; i > 0; i--) {
|
||||
fprintf(stderr, "%.1f ", gains [i - 1] / 10.0);
|
||||
fprintf (stderr, "%.1f ", gains [i - 1] / 10.0);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
rtlsdr_set_tuner_gain_mode (device, 1);
|
||||
|
||||
rtlsdr_set_agc_mode (device, 0);
|
||||
_I_Buffer = new RingBuffer<uint8_t>(8 * 1024 * 1024);
|
||||
//
|
||||
// See what the saved values are and restore the GUI settings
|
||||
@@ -212,7 +212,8 @@ int i, k;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// library was open
|
||||
stopReader ();
|
||||
|
||||
this -> rtlsdr_close (device);
|
||||
@@ -233,7 +234,7 @@ int i, k;
|
||||
}
|
||||
//
|
||||
//
|
||||
bool rtlsdrHandler::restartReader (void) {
|
||||
bool rtlsdrHandler::restartReader (int32_t frequency) {
|
||||
int32_t r;
|
||||
|
||||
if (workerHandle != NULL)
|
||||
@@ -244,7 +245,7 @@ int32_t r;
|
||||
if (r < 0)
|
||||
return false;
|
||||
|
||||
this -> rtlsdr_set_center_freq (device, lastFrequency);
|
||||
this -> rtlsdr_set_center_freq (device, frequency);
|
||||
workerHandle = new dll_driver (this);
|
||||
rtlsdr_set_agc_mode (device, agcControl -> isChecked ());
|
||||
rtlsdr_set_tuner_gain (device,
|
||||
@@ -268,8 +269,10 @@ void rtlsdrHandler::stopReader (void) {
|
||||
//
|
||||
// when selecting the gain from a table, use the table value
|
||||
void rtlsdrHandler::set_ifgain (int gain) {
|
||||
fprintf (stderr, "gain will be set %d to %d\n",
|
||||
gain, gains [gain * gainsCount / 100]);
|
||||
rtlsdr_set_tuner_gain (device,
|
||||
gain * gainsCount / 100);
|
||||
gains [gain * gainsCount / 100]);
|
||||
}
|
||||
//
|
||||
void rtlsdrHandler::set_agcControl (int dummy) {
|
||||
@@ -277,21 +280,42 @@ void rtlsdrHandler::set_agcControl (int dummy) {
|
||||
rtlsdr_set_tuner_gain (device,
|
||||
gains [(int)(ifgainSelector -> value () * gainsCount / 100)]);
|
||||
}
|
||||
|
||||
//
|
||||
// we only have 8 bits, so rather than doing a float division to get
|
||||
// the float value we want, we precompute the possibilities
|
||||
static
|
||||
float convTable [] = {
|
||||
-128 / 128.0 , -127 / 128.0 , -126 / 128.0 , -125 / 128.0 , -124 / 128.0 , -123 / 128.0 , -122 / 128.0 , -121 / 128.0 , -120 / 128.0 , -119 / 128.0 , -118 / 128.0 , -117 / 128.0 , -116 / 128.0 , -115 / 128.0 , -114 / 128.0 , -113 / 128.0
|
||||
, -112 / 128.0 , -111 / 128.0 , -110 / 128.0 , -109 / 128.0 , -108 / 128.0 , -107 / 128.0 , -106 / 128.0 , -105 / 128.0 , -104 / 128.0 , -103 / 128.0 , -102 / 128.0 , -101 / 128.0 , -100 / 128.0 , -99 / 128.0 , -98 / 128.0 , -97 / 128.0
|
||||
, -96 / 128.0 , -95 / 128.0 , -94 / 128.0 , -93 / 128.0 , -92 / 128.0 , -91 / 128.0 , -90 / 128.0 , -89 / 128.0 , -88 / 128.0 , -87 / 128.0 , -86 / 128.0 , -85 / 128.0 , -84 / 128.0 , -83 / 128.0 , -82 / 128.0 , -81 / 128.0
|
||||
, -80 / 128.0 , -79 / 128.0 , -78 / 128.0 , -77 / 128.0 , -76 / 128.0 , -75 / 128.0 , -74 / 128.0 , -73 / 128.0 , -72 / 128.0 , -71 / 128.0 , -70 / 128.0 , -69 / 128.0 , -68 / 128.0 , -67 / 128.0 , -66 / 128.0 , -65 / 128.0
|
||||
, -64 / 128.0 , -63 / 128.0 , -62 / 128.0 , -61 / 128.0 , -60 / 128.0 , -59 / 128.0 , -58 / 128.0 , -57 / 128.0 , -56 / 128.0 , -55 / 128.0 , -54 / 128.0 , -53 / 128.0 , -52 / 128.0 , -51 / 128.0 , -50 / 128.0 , -49 / 128.0
|
||||
, -48 / 128.0 , -47 / 128.0 , -46 / 128.0 , -45 / 128.0 , -44 / 128.0 , -43 / 128.0 , -42 / 128.0 , -41 / 128.0 , -40 / 128.0 , -39 / 128.0 , -38 / 128.0 , -37 / 128.0 , -36 / 128.0 , -35 / 128.0 , -34 / 128.0 , -33 / 128.0
|
||||
, -32 / 128.0 , -31 / 128.0 , -30 / 128.0 , -29 / 128.0 , -28 / 128.0 , -27 / 128.0 , -26 / 128.0 , -25 / 128.0 , -24 / 128.0 , -23 / 128.0 , -22 / 128.0 , -21 / 128.0 , -20 / 128.0 , -19 / 128.0 , -18 / 128.0 , -17 / 128.0
|
||||
, -16 / 128.0 , -15 / 128.0 , -14 / 128.0 , -13 / 128.0 , -12 / 128.0 , -11 / 128.0 , -10 / 128.0 , -9 / 128.0 , -8 / 128.0 , -7 / 128.0 , -6 / 128.0 , -5 / 128.0 , -4 / 128.0 , -3 / 128.0 , -2 / 128.0 , -1 / 128.0
|
||||
, 0 / 128.0 , 1 / 128.0 , 2 / 128.0 , 3 / 128.0 , 4 / 128.0 , 5 / 128.0 , 6 / 128.0 , 7 / 128.0 , 8 / 128.0 , 9 / 128.0 , 10 / 128.0 , 11 / 128.0 , 12 / 128.0 , 13 / 128.0 , 14 / 128.0 , 15 / 128.0
|
||||
, 16 / 128.0 , 17 / 128.0 , 18 / 128.0 , 19 / 128.0 , 20 / 128.0 , 21 / 128.0 , 22 / 128.0 , 23 / 128.0 , 24 / 128.0 , 25 / 128.0 , 26 / 128.0 , 27 / 128.0 , 28 / 128.0 , 29 / 128.0 , 30 / 128.0 , 31 / 128.0
|
||||
, 32 / 128.0 , 33 / 128.0 , 34 / 128.0 , 35 / 128.0 , 36 / 128.0 , 37 / 128.0 , 38 / 128.0 , 39 / 128.0 , 40 / 128.0 , 41 / 128.0 , 42 / 128.0 , 43 / 128.0 , 44 / 128.0 , 45 / 128.0 , 46 / 128.0 , 47 / 128.0
|
||||
, 48 / 128.0 , 49 / 128.0 , 50 / 128.0 , 51 / 128.0 , 52 / 128.0 , 53 / 128.0 , 54 / 128.0 , 55 / 128.0 , 56 / 128.0 , 57 / 128.0 , 58 / 128.0 , 59 / 128.0 , 60 / 128.0 , 61 / 128.0 , 62 / 128.0 , 63 / 128.0
|
||||
, 64 / 128.0 , 65 / 128.0 , 66 / 128.0 , 67 / 128.0 , 68 / 128.0 , 69 / 128.0 , 70 / 128.0 , 71 / 128.0 , 72 / 128.0 , 73 / 128.0 , 74 / 128.0 , 75 / 128.0 , 76 / 128.0 , 77 / 128.0 , 78 / 128.0 , 79 / 128.0
|
||||
, 80 / 128.0 , 81 / 128.0 , 82 / 128.0 , 83 / 128.0 , 84 / 128.0 , 85 / 128.0 , 86 / 128.0 , 87 / 128.0 , 88 / 128.0 , 89 / 128.0 , 90 / 128.0 , 91 / 128.0 , 92 / 128.0 , 93 / 128.0 , 94 / 128.0 , 95 / 128.0
|
||||
, 96 / 128.0 , 97 / 128.0 , 98 / 128.0 , 99 / 128.0 , 100 / 128.0 , 101 / 128.0 , 102 / 128.0 , 103 / 128.0 , 104 / 128.0 , 105 / 128.0 , 106 / 128.0 , 107 / 128.0 , 108 / 128.0 , 109 / 128.0 , 110 / 128.0 , 111 / 128.0
|
||||
, 112 / 128.0 , 113 / 128.0 , 114 / 128.0 , 115 / 128.0 , 116 / 128.0 , 117 / 128.0 , 118 / 128.0 , 119 / 128.0 , 120 / 128.0 , 121 / 128.0 , 122 / 128.0 , 123 / 128.0 , 124 / 128.0 , 125 / 128.0 , 126 / 128.0 , 127 / 128.0 };
|
||||
|
||||
//
|
||||
// The brave old getSamples. For the dab stick, we get
|
||||
// size samples: still in I/Q pairs, but we have to convert the data from
|
||||
// uint8_t to std::complex<float> *
|
||||
// uint8_t to DSPCOMPLEX *
|
||||
int32_t rtlsdrHandler::getSamples (std::complex<float> *V, int32_t size) {
|
||||
int32_t amount, i;
|
||||
uint8_t *tempBuffer = (uint8_t *)alloca (2 * size * sizeof (uint8_t));
|
||||
//
|
||||
amount = _I_Buffer -> getDataFromBuffer (tempBuffer, 2 * size);
|
||||
|
||||
for (i = 0; i < amount / 2; i ++)
|
||||
V [i] = std::complex<float>
|
||||
((float (tempBuffer [2 * i] - 128)) / 128.0,
|
||||
(float (tempBuffer [2 * i + 1] - 128)) / 128.0);
|
||||
(convTable [tempBuffer [2 * i]],
|
||||
convTable [tempBuffer [2 * i + 1]]);;
|
||||
return amount / 2;
|
||||
}
|
||||
|
||||
|
@@ -72,7 +72,7 @@ public:
|
||||
QCheckBox *);
|
||||
~rtlsdrHandler (void);
|
||||
// interface to the reader
|
||||
bool restartReader (void);
|
||||
bool restartReader (int32_t frequency);
|
||||
void stopReader (void);
|
||||
int32_t getSamples (std::complex<float> *, int32_t);
|
||||
int32_t Samples (void);
|
||||
|
@@ -168,7 +168,7 @@ ULONG APIkeyValue_length = 255;
|
||||
break;
|
||||
}
|
||||
|
||||
sdrplaySettings -> beginGroup ("sdrplaySettings");
|
||||
sdrplaySettings -> beginGroup ("sdrplaySettings");
|
||||
int lnaState = sdrplaySettings -> value ("lnaState", 3). toInt ();
|
||||
lnaGainSetting -> setValue (lnaState);
|
||||
|
||||
@@ -211,6 +211,11 @@ ULONG APIkeyValue_length = 255;
|
||||
if (!libraryLoaded) // should not happen
|
||||
return;
|
||||
stopReader ();
|
||||
|
||||
sdrplaySettings -> beginGroup ("sdrplaySettings");
|
||||
sdrplaySettings -> value ("lnaState", lnaGainSetting -> value ());
|
||||
sdrplaySettings -> value ("GRdB", GRdBSelector -> value ());
|
||||
sdrplaySettings -> endGroup ();
|
||||
|
||||
if (_I_Buffer != NULL)
|
||||
delete _I_Buffer;
|
||||
@@ -228,16 +233,22 @@ int GRdB = GRdBSelector -> value ();
|
||||
int lnaState = lnaGainSetting -> value ();
|
||||
|
||||
(void)newGRdB;
|
||||
if (!running. load ())
|
||||
return;
|
||||
|
||||
err = my_mir_sdr_RSP_SetGr (GRdB, lnaState, 1, 0);
|
||||
if (err != mir_sdr_Success)
|
||||
fprintf (stderr, "Error at set_ifgain %s\n",
|
||||
errorCodes (err). toLatin1 (). data ());
|
||||
fprintf (stderr, "Error at set_ifgain %s (%d %d)\n",
|
||||
errorCodes (err). toLatin1 (). data (),
|
||||
GRdB, lnaState);
|
||||
}
|
||||
|
||||
void sdrplayHandler::set_lnagainReduction (int lnaState) {
|
||||
mir_sdr_ErrT err;
|
||||
|
||||
if (!running. load ())
|
||||
return;
|
||||
|
||||
if (!agcControl -> isChecked ()) {
|
||||
set_ifgainReduction (0);
|
||||
return;
|
||||
|
@@ -371,9 +371,9 @@
|
||||
<widget class="QProgressBar" name="ficQuality">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>30</x>
|
||||
<y>40</y>
|
||||
<width>151</width>
|
||||
<x>10</x>
|
||||
<y>30</y>
|
||||
<width>121</width>
|
||||
<height>21</height>
|
||||
</rect>
|
||||
</property>
|
||||
@@ -387,9 +387,9 @@
|
||||
<widget class="QProgressBar" name="audioQuality">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>187</x>
|
||||
<y>40</y>
|
||||
<width>171</width>
|
||||
<x>140</x>
|
||||
<y>30</y>
|
||||
<width>151</width>
|
||||
<height>21</height>
|
||||
</rect>
|
||||
</property>
|
||||
@@ -400,6 +400,25 @@
|
||||
<number>24</number>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLCDNumber" name="snrDisplay">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>300</x>
|
||||
<y>30</y>
|
||||
<width>64</width>
|
||||
<height>23</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="frameShape">
|
||||
<enum>QFrame::NoFrame</enum>
|
||||
</property>
|
||||
<property name="digitCount">
|
||||
<number>3</number>
|
||||
</property>
|
||||
<property name="segmentStyle">
|
||||
<enum>QLCDNumber::Flat</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</widget>
|
||||
<widget class="QStatusBar" name="statusbar"/>
|
||||
</widget>
|
||||
|
@@ -29,7 +29,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include "viterbi-768.h"
|
||||
#include "viterbi-handler.h"
|
||||
#include <QObject>
|
||||
#include "fib-processor.h"
|
||||
#include "dab-params.h"
|
||||
@@ -46,7 +46,7 @@ public:
|
||||
void stop (void);
|
||||
void reset (void);
|
||||
private:
|
||||
viterbi_768 myViterbi;
|
||||
viterbiHandler myViterbi;
|
||||
dabParams params;
|
||||
uint8_t bitBuffer_out [768];
|
||||
int16_t ofdm_input [2304];
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB program
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#ifndef __OFDM_DECODER__
|
||||
@@ -55,7 +55,6 @@ public:
|
||||
void decode (std::vector<std::complex<float> >,
|
||||
int32_t n, int16_t *);
|
||||
|
||||
int16_t get_snr (std::complex<float> *);
|
||||
void stop (void);
|
||||
void reset (void);
|
||||
private:
|
||||
@@ -71,17 +70,13 @@ private:
|
||||
int32_t T_g;
|
||||
int32_t nrBlocks;
|
||||
int32_t carriers;
|
||||
int16_t getMiddle (void);
|
||||
std::vector<complex<float>> phaseReference;
|
||||
std::vector<int16_t> ibits;
|
||||
std::complex<float> *fft_buffer;
|
||||
phaseTable *phasetable;
|
||||
int32_t blockIndex;
|
||||
int16_t snrCount;
|
||||
int16_t snr;
|
||||
int16_t maxSignal;
|
||||
signals:
|
||||
void show_snr (int);
|
||||
void showIQ (int);
|
||||
void showQuality (float);
|
||||
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB.
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB.
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*
|
||||
@@ -27,11 +27,11 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include "viterbi-768.h"
|
||||
#include "viterbi-handler.h"
|
||||
|
||||
extern uint8_t PI_X [];
|
||||
|
||||
class protection: public viterbi_768 {
|
||||
class protection: public viterbiHandler {
|
||||
public:
|
||||
protection (int16_t, int16_t);
|
||||
virtual ~protection (void);
|
||||
@@ -39,8 +39,8 @@ virtual bool deconvolve (int16_t *, int32_t, uint8_t *);
|
||||
protected:
|
||||
int16_t bitRate;
|
||||
int32_t outSize;
|
||||
std::vector<int16_t> viterbiBlock;
|
||||
std::vector<uint8_t> indexTable;
|
||||
std::vector<int16_t> viterbiBlock;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB program
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#
|
||||
|
@@ -1,35 +0,0 @@
|
||||
#
|
||||
//
|
||||
// This LUT implementation of atan2 is a C++ translation of
|
||||
// a Java discussion on the net
|
||||
// http://www.java-gaming.org/index.php?topic=14647.0
|
||||
|
||||
#ifndef __COMP_ATAN
|
||||
#define __COMP_ATAN
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
#include "dab-constants.h"
|
||||
#
|
||||
class compAtan {
|
||||
public:
|
||||
compAtan (void);
|
||||
~compAtan (void);
|
||||
float atan2 (float, float);
|
||||
float argX (std::complex<float>);
|
||||
private:
|
||||
float *ATAN2_TABLE_PPY;
|
||||
float *ATAN2_TABLE_PPX;
|
||||
float *ATAN2_TABLE_PNY;
|
||||
float *ATAN2_TABLE_PNX;
|
||||
float *ATAN2_TABLE_NPY;
|
||||
float *ATAN2_TABLE_NPX;
|
||||
float *ATAN2_TABLE_NNY;
|
||||
float *ATAN2_TABLE_NNX;
|
||||
float Stretch;
|
||||
};
|
||||
|
||||
#endif
|
47
includes/support/viterbi-handler.h
Normal file
47
includes/support/viterbi-handler.h
Normal file
@@ -0,0 +1,47 @@
|
||||
#
|
||||
/*
|
||||
* Copyright (C) 2014 .. 2017
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of dabradio
|
||||
*
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef __VITERBI_HANDLER__
|
||||
#define __VITERBI_HANDLER__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
class viterbiHandler {
|
||||
|
||||
public:
|
||||
viterbiHandler (int);
|
||||
~viterbiHandler (void);
|
||||
void deconvolve (int16_t *, uint8_t *);
|
||||
private:
|
||||
int costTable [16];
|
||||
void computeCostTable (int16_t, int16_t, int16_t, int16_t);
|
||||
uint8_t bitFor (int, int, int);
|
||||
int blockLength;
|
||||
int *stateSequence;
|
||||
int **transCosts;
|
||||
int **history;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,63 +0,0 @@
|
||||
#
|
||||
#ifndef __VITERBI__
|
||||
#define __VITERBI__
|
||||
/*
|
||||
* Viterbi.h according to the SPIRAL project
|
||||
*/
|
||||
#include "dab-constants.h"
|
||||
|
||||
// For our particular viterbi decoder, we have
|
||||
class viterbi {
|
||||
#define RATE 4
|
||||
#define NUMSTATES 64
|
||||
#define BITS_PER_BYTE 8
|
||||
|
||||
// decision_t is a BIT vector
|
||||
typedef union {
|
||||
uint8_t t [NUMSTATES / BITS_PER_BYTE];
|
||||
uint32_t w [NUMSTATES / 32];
|
||||
uint16_t s [NUMSTATES / 16];
|
||||
uint8_t c [NUMSTATES / 8];
|
||||
} decision_t __attribute__ ((aligned (16)));
|
||||
|
||||
typedef union {
|
||||
int16_t t[NUMSTATES];
|
||||
} metric_t __attribute__ ((aligned (16)));
|
||||
|
||||
/*
|
||||
* State info for instance of Viterbi decoder
|
||||
*/
|
||||
|
||||
struct v {
|
||||
/* path metric buffer 1 */
|
||||
__attribute__ ((aligned (16))) metric_t metrics1;
|
||||
/* path metric buffer 2 */
|
||||
__attribute__ ((aligned (16))) metric_t metrics2;
|
||||
/* Pointers to path metrics, swapped on every bit */
|
||||
metric_t *old_metrics,*new_metrics;
|
||||
decision_t *decisions; /* decisions */
|
||||
};
|
||||
|
||||
public:
|
||||
viterbi (int16_t);
|
||||
~viterbi (void);
|
||||
void deconvolve (int16_t *, uint8_t *);
|
||||
private:
|
||||
|
||||
struct v vp;
|
||||
int16_t Branchtab [NUMSTATES / 2 * RATE] __attribute__ ((aligned (16)));
|
||||
int16_t parity (int16_t);
|
||||
void init_viterbi (struct v *, int16_t);
|
||||
void update_viterbi_blk_GENERIC (struct v *, int16_t *,
|
||||
int16_t);
|
||||
|
||||
void chainback_viterbi (struct v *, uint8_t *, int16_t, uint16_t);
|
||||
void BFLY (int32_t, int, int16_t *,
|
||||
struct v *, decision_t *);
|
||||
uint8_t *data;
|
||||
int16_t *symbols;
|
||||
int16_t frameBits;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -1,73 +0,0 @@
|
||||
#
|
||||
#ifndef __VITERBI_768__
|
||||
#define __VITERBI_768__
|
||||
/*
|
||||
* Viterbi.h according to the SPIRAL project
|
||||
*/
|
||||
#include "dab-constants.h"
|
||||
|
||||
// For our particular viterbi decoder, we have
|
||||
#define RATE 4
|
||||
#define NUMSTATES 64
|
||||
#define DECISIONTYPE uint32_t
|
||||
//#define DECISIONTYPE uint8_t
|
||||
//#define DECISIONTYPE_BITSIZE 8
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE uint32_t
|
||||
|
||||
//decision_t is a BIT vector
|
||||
typedef union {
|
||||
DECISIONTYPE t[NUMSTATES/DECISIONTYPE_BITSIZE];
|
||||
uint32_t w[NUMSTATES/32];
|
||||
uint16_t s[NUMSTATES/16];
|
||||
uint8_t c[NUMSTATES/8];
|
||||
} decision_t __attribute__ ((aligned (16)));
|
||||
|
||||
typedef union {
|
||||
COMPUTETYPE t[NUMSTATES];
|
||||
} metric_t __attribute__ ((aligned (16)));
|
||||
|
||||
/* State info for instance of Viterbi decoder
|
||||
*/
|
||||
|
||||
struct v {
|
||||
/* path metric buffer 1 */
|
||||
__attribute__ ((aligned (16))) metric_t metrics1;
|
||||
/* path metric buffer 2 */
|
||||
__attribute__ ((aligned (16))) metric_t metrics2;
|
||||
/* Pointers to path metrics, swapped on every bit */
|
||||
metric_t *old_metrics,*new_metrics;
|
||||
decision_t *decisions; /* decisions */
|
||||
};
|
||||
|
||||
class viterbi_768 {
|
||||
public:
|
||||
viterbi_768 (int16_t, bool spiral = false);
|
||||
~viterbi_768 (void);
|
||||
void deconvolve (int16_t *, uint8_t *);
|
||||
private:
|
||||
|
||||
bool spiral;
|
||||
struct v vp;
|
||||
COMPUTETYPE Branchtab [NUMSTATES / 2 * RATE] __attribute__ ((aligned (16)));
|
||||
// int parityb (uint8_t);
|
||||
int parity (int);
|
||||
void partab_init (void);
|
||||
// uint8_t Partab [256];
|
||||
void init_viterbi (struct v *, int16_t);
|
||||
void update_viterbi_blk_GENERIC (struct v *, COMPUTETYPE *,
|
||||
int16_t);
|
||||
void update_viterbi_blk_SPIRAL (struct v *, COMPUTETYPE *,
|
||||
int16_t);
|
||||
void chainback_viterbi (struct v *, uint8_t *, int16_t, uint16_t);
|
||||
struct v *viterbi_alloc (int32_t);
|
||||
void BFLY (int32_t, int, COMPUTETYPE *,
|
||||
struct v *, decision_t *);
|
||||
// uint8_t *bits;
|
||||
uint8_t *data;
|
||||
COMPUTETYPE *symbols;
|
||||
int16_t frameBits;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
2
main.cpp
2
main.cpp
@@ -127,7 +127,7 @@ int opt;
|
||||
fflush (stdout);
|
||||
fflush (stderr);
|
||||
qDebug ("It is done\n");
|
||||
// delete MyRadioInterface;
|
||||
delete MyRadioInterface;
|
||||
delete dabSettings;
|
||||
}
|
||||
|
||||
|
12
radio.cpp
12
radio.cpp
@@ -170,6 +170,8 @@ QString h;
|
||||
picturesPath);
|
||||
connect (my_dabProcessor, SIGNAL (setSynced (char)),
|
||||
this, SLOT (setSynced (char)));
|
||||
connect (my_dabProcessor, SIGNAL (show_snr (int)),
|
||||
this, SLOT (show_snr (int)));
|
||||
//
|
||||
serviceCharacteristics = NULL;
|
||||
secondsTimer. setInterval (1000);
|
||||
@@ -283,6 +285,7 @@ void RadioInterface::nextChannel (void) {
|
||||
|
||||
void RadioInterface::reset (void) {
|
||||
my_dabProcessor -> stop ();
|
||||
|
||||
disconnect (ensembleDisplay,
|
||||
SIGNAL (clicked (QModelIndex)),
|
||||
this, SLOT (selectService (QModelIndex)));
|
||||
@@ -394,7 +397,7 @@ void RadioInterface::showQuality (float f) {
|
||||
}
|
||||
|
||||
void RadioInterface::show_snr (int s) {
|
||||
(void)s;
|
||||
snrDisplay -> display (s);
|
||||
}
|
||||
|
||||
void RadioInterface::set_CorrectorDisplay (int c) {
|
||||
@@ -531,15 +534,16 @@ void RadioInterface::TerminateProcess (void) {
|
||||
displayTimer. stop ();
|
||||
signalTimer. stop ();
|
||||
|
||||
inputDevice -> stopReader ();
|
||||
my_dabProcessor -> stop (); // definitely concurrent
|
||||
soundOut -> stop ();
|
||||
// everything should be halted by now
|
||||
delete soundOut;
|
||||
if (inputDevice != NULL)
|
||||
delete inputDevice;
|
||||
fprintf (stderr, "going to delete dabProcessor\n");
|
||||
delete my_dabProcessor;
|
||||
fprintf (stderr, "deleted dabProcessor\n");
|
||||
delete soundOut;
|
||||
if (inputDevice != NULL)
|
||||
delete inputDevice;
|
||||
if (ensembleDisplay != NULL)
|
||||
delete ensembleDisplay;
|
||||
if (serviceDescription != NULL)
|
||||
|
22
script-rpi.sh
Normal file
22
script-rpi.sh
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install qt4-qmake build-essential g++
|
||||
sudo apt-get install libsndfile1-dev qt4-default libfftw3-dev portaudio19-dev
|
||||
sudo apt-get install libfaad-dev zlib1g-dev libusb-1.0-0-dev mesa-common-dev
|
||||
sudo apt-get install libgl1-mesa-dev libqt4-opengl-dev libsamplerate-dev libqwt-dev
|
||||
|
||||
wget http://sm5bsz.com/linuxdsp/hware/rtlsdr/rtl-sdr-linrad4.tbz
|
||||
tar xvfj rtl-sdr-linrad4.tbz
|
||||
cd rtl-sdr-linrad4
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DDETACH_KERNEL_DRIVER=ON -DINSTALL_UDEV_RULES=ON
|
||||
make
|
||||
sudo make install
|
||||
sudo ldconfig
|
||||
cd ..
|
||||
cd ..
|
||||
|
||||
qmake-qt4
|
||||
make
|
||||
|
@@ -46,7 +46,7 @@
|
||||
uint8_t dabMode):
|
||||
params (dabMode),
|
||||
fib_processor (mr),
|
||||
myViterbi (768, true) {
|
||||
myViterbi (768) {
|
||||
int16_t i, j, k;
|
||||
int local = 0;
|
||||
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of Qt-DAB
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* Once the bits are "in", interpretation and manipulation
|
||||
@@ -68,12 +68,6 @@ int16_t i;
|
||||
this -> T_g = T_s - T_u;
|
||||
fft_buffer = my_fftHandler. getVector ();
|
||||
phaseReference .resize (T_u);
|
||||
|
||||
connect (this, SIGNAL (show_snr (int)),
|
||||
mr, SLOT (show_snr (int)));
|
||||
|
||||
snrCount = 0;
|
||||
snr = 0;
|
||||
}
|
||||
|
||||
ofdmDecoder::~ofdmDecoder (void) {
|
||||
@@ -93,17 +87,6 @@ void ofdmDecoder::processBlock_0 (std::vector <std::complex<float> > buffer) {
|
||||
T_u * sizeof (std::complex<float>));
|
||||
|
||||
my_fftHandler. do_FFT ();
|
||||
/**
|
||||
* The SNR is determined by looking at a segment of bins
|
||||
* within the signal region and bits outside.
|
||||
* It is just an indication
|
||||
*/
|
||||
|
||||
if (++snrCount > 10) {
|
||||
snr = 0.8 * snr + 0.2 * get_snr (fft_buffer);
|
||||
// show_snr (snr);
|
||||
snrCount = 0;
|
||||
}
|
||||
/**
|
||||
* we are now in the frequency domain, and we keep the carriers
|
||||
* as coming from the FFT as phase reference.
|
||||
@@ -205,26 +188,4 @@ toBitsLabel:
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* for the snr we have a full T_u wide vector, with in the middle
|
||||
* K carriers.
|
||||
* Just get the strength from the selected carriers compared
|
||||
* to the strength of the carriers outside that region
|
||||
*/
|
||||
int16_t ofdmDecoder::get_snr (std::complex<float> *v) {
|
||||
int16_t i;
|
||||
float noise = 0;
|
||||
float signal = 0;
|
||||
|
||||
for (i = -100; i < 100; i ++)
|
||||
noise += abs (v [(T_u / 2 + i)]);
|
||||
|
||||
noise /= 200;
|
||||
for (i = - carriers / 4; i < carriers / 4; i ++)
|
||||
signal += abs (v [(T_u + i) % T_u]);
|
||||
signal /= (carriers / 2);
|
||||
|
||||
return 20 * log10 ((signal + 0.005) / (noise + 0.005));
|
||||
}
|
||||
|
||||
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB.
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The eep handling
|
||||
@@ -159,7 +159,7 @@ int16_t inputCounter = 0;
|
||||
if (indexTable [i])
|
||||
viterbiBlock [i] = v [inputCounter ++];
|
||||
|
||||
viterbi_768::deconvolve (viterbiBlock. data (), outBuffer);
|
||||
viterbiHandler::deconvolve (viterbiBlock. data (), outBuffer);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB.
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*
|
||||
@@ -26,11 +26,12 @@
|
||||
#include "protection.h"
|
||||
|
||||
protection::protection (int16_t bitRate, int16_t protLevel):
|
||||
viterbi_768 (24 * bitRate, false),
|
||||
viterbiHandler (24 * bitRate),
|
||||
outSize (24 * bitRate),
|
||||
indexTable (outSize * 4 + 24),
|
||||
viterbiBlock (outSize * 4 + 24){
|
||||
this -> bitRate = bitRate;
|
||||
(void)protLevel;
|
||||
}
|
||||
protection::~protection (void) {}
|
||||
bool protection::deconvolve (int16_t *a,
|
||||
|
@@ -4,19 +4,19 @@
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB program
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* This file is part of the dabradio
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The deconvolution for uep
|
||||
@@ -231,6 +231,8 @@ bool uep_protection::deconvolve (int16_t *v,
|
||||
uint8_t *outBuffer) {
|
||||
int16_t i;
|
||||
int16_t inputCounter = 0;
|
||||
|
||||
(void)size;
|
||||
// clear the bits in the viterbiBlock,
|
||||
// only the non-punctured ones are set
|
||||
memset (viterbiBlock. data (), 0,
|
||||
@@ -240,6 +242,6 @@ int16_t inputCounter = 0;
|
||||
for (i = 0; i < outSize * 4 + 24; i ++)
|
||||
if (indexTable [i])
|
||||
viterbiBlock [i] = v [inputCounter ++];
|
||||
viterbi_768::deconvolve (viterbiBlock. data (), outBuffer);
|
||||
viterbiHandler::deconvolve (viterbiBlock. data (), outBuffer);
|
||||
return true;
|
||||
}
|
||||
|
@@ -1,100 +0,0 @@
|
||||
#
|
||||
//
|
||||
// This LUT implementation of atan2 is a C++ translation of
|
||||
// a Java discussion on the net
|
||||
// http://www.java-gaming.org/index.php?topic=14647.0
|
||||
|
||||
#include "Xtan2.h"
|
||||
|
||||
#define SIZE 8192
|
||||
#define EZIS (-SIZE)
|
||||
|
||||
compAtan::compAtan (void) {
|
||||
|
||||
Stretch = M_PI;
|
||||
// private static final int SIZE = 1024;
|
||||
// private static final float Stretch = (float)Math.PI;
|
||||
// Output will swing from -Stretch to Stretch (default: Math.PI)
|
||||
// Useful to change to 1 if you would normally do "atan2(y, x) / Math.PI"
|
||||
|
||||
ATAN2_TABLE_PPY = new float [SIZE + 1];
|
||||
ATAN2_TABLE_PPX = new float [SIZE + 1];
|
||||
ATAN2_TABLE_PNY = new float [SIZE + 1];
|
||||
ATAN2_TABLE_PNX = new float [SIZE + 1];
|
||||
ATAN2_TABLE_NPY = new float [SIZE + 1];
|
||||
ATAN2_TABLE_NPX = new float [SIZE + 1];
|
||||
ATAN2_TABLE_NNY = new float [SIZE + 1];
|
||||
ATAN2_TABLE_NNX = new float [SIZE + 1];
|
||||
for (int i = 0; i <= SIZE; i++) {
|
||||
float f = (float)i / SIZE;
|
||||
ATAN2_TABLE_PPY [i] = atan(f) * Stretch / M_PI;
|
||||
ATAN2_TABLE_PPX [i] = Stretch * 0.5f - ATAN2_TABLE_PPY[i];
|
||||
ATAN2_TABLE_PNY [i] = -ATAN2_TABLE_PPY [i];
|
||||
ATAN2_TABLE_PNX [i] = ATAN2_TABLE_PPY [i] - Stretch * 0.5f;
|
||||
ATAN2_TABLE_NPY [i] = Stretch - ATAN2_TABLE_PPY [i];
|
||||
ATAN2_TABLE_NPX [i] = ATAN2_TABLE_PPY [i] + Stretch * 0.5f;
|
||||
ATAN2_TABLE_NNY [i] = ATAN2_TABLE_PPY [i] - Stretch;
|
||||
ATAN2_TABLE_NNX [i] = -Stretch * 0.5f - ATAN2_TABLE_PPY [i];
|
||||
}
|
||||
}
|
||||
|
||||
compAtan::~compAtan (void) {
|
||||
delete ATAN2_TABLE_PPY;
|
||||
delete ATAN2_TABLE_PPX;
|
||||
delete ATAN2_TABLE_PNY;
|
||||
delete ATAN2_TABLE_PNX;
|
||||
delete ATAN2_TABLE_NPY;
|
||||
delete ATAN2_TABLE_NPX;
|
||||
delete ATAN2_TABLE_NNY;
|
||||
delete ATAN2_TABLE_NNX;
|
||||
}
|
||||
|
||||
/**
|
||||
* ATAN2 : performance degrades due to the many "0" tests
|
||||
*/
|
||||
|
||||
float compAtan::atan2 (float y, float x) {
|
||||
if (x == 0) {
|
||||
if (y == 0) return 0;
|
||||
// return std::numeric_limits<float>::infinity ();
|
||||
else
|
||||
if (y > 0)
|
||||
return M_PI / 2;
|
||||
else // y < 0
|
||||
return - M_PI / 2;
|
||||
}
|
||||
|
||||
if (x > 0) {
|
||||
if (y >= 0) {
|
||||
if (x >= y)
|
||||
return ATAN2_TABLE_PPY[(int)(SIZE * y / x + 0.5)];
|
||||
else
|
||||
return ATAN2_TABLE_PPX[(int)(SIZE * x / y + 0.5)];
|
||||
|
||||
}
|
||||
else {
|
||||
if (x >= -y)
|
||||
return ATAN2_TABLE_PNY[(int)(EZIS * y / x + 0.5)];
|
||||
else
|
||||
return ATAN2_TABLE_PNX[(int)(EZIS * x / y + 0.5)];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (y >= 0) {
|
||||
if (-x >= y)
|
||||
return ATAN2_TABLE_NPY[(int)(EZIS * y / x + 0.5)];
|
||||
else
|
||||
return ATAN2_TABLE_NPX[(int)(EZIS * x / y + 0.5)];
|
||||
}
|
||||
else {
|
||||
if (x <= y) // (-x >= -y)
|
||||
return ATAN2_TABLE_NNY[(int)(SIZE * y / x + 0.5)];
|
||||
else
|
||||
return ATAN2_TABLE_NNX[(int)(SIZE * x / y + 0.5)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float compAtan::argX (std::complex<float> v) {
|
||||
return this -> atan2 (imag (v), real (v));
|
||||
}
|
244
src/support/viterbi-handler.cpp
Normal file
244
src/support/viterbi-handler.cpp
Normal file
@@ -0,0 +1,244 @@
|
||||
#
|
||||
/*
|
||||
* Copyright (C) 2014 .. 2017
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of dabradio
|
||||
*
|
||||
* dabradio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dabradio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dabradio; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include "viterbi-handler.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define K 7
|
||||
#define Poly1 0133
|
||||
#define Poly2 0171
|
||||
#define Poly3 0145
|
||||
#define Poly4 0133
|
||||
#define numofStates (1 << (K - 1))
|
||||
|
||||
static int predecessor_for_0 [numofStates];
|
||||
static int predecessor_for_1 [numofStates];
|
||||
static int16_t indexTable [2 * numofStates];
|
||||
|
||||
viterbiHandler::viterbiHandler (int blockLength) {
|
||||
int i, j;
|
||||
this -> blockLength = blockLength;
|
||||
|
||||
transCosts = new int *[blockLength + 6 + 1];
|
||||
history = new int *[blockLength + 6 + 1];
|
||||
stateSequence = new int [blockLength + 6 + 1];
|
||||
//
|
||||
for (i = 0; i < blockLength + 6; i++) {
|
||||
transCosts [i] = new int [numofStates];
|
||||
history [i] = new int [numofStates];
|
||||
stateSequence [i] = 0;
|
||||
for (j = 0; j < numofStates; j ++) {
|
||||
transCosts [i][j] = 0;
|
||||
history [i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// These tables give a mapping from (state * bit * Poly -> outputbit)
|
||||
uint8_t poly1_table [2 * numofStates];
|
||||
for (i = 0; i < 2; i ++)
|
||||
for (j = 0; j < numofStates; j ++)
|
||||
poly1_table [i * numofStates + j] = bitFor (j, Poly1, i);
|
||||
|
||||
int8_t poly2_table [2 * numofStates];
|
||||
for (i = 0; i < 2; i ++)
|
||||
for (j = 0; j < numofStates; j ++)
|
||||
poly2_table [i * numofStates + j] = bitFor (j, Poly2, i);
|
||||
|
||||
uint8_t poly3_table [2 * numofStates];
|
||||
for (i = 0; i < 2; i ++)
|
||||
for (j = 0; j < numofStates; j ++)
|
||||
poly3_table [i * numofStates + j] = bitFor (j, Poly3, i);
|
||||
|
||||
uint8_t poly4_table [2 * numofStates];
|
||||
for (i = 0; i < 2; i ++)
|
||||
for (j = 0; j < numofStates; j ++)
|
||||
poly4_table [i * numofStates + j] = bitFor (j, Poly4, i);
|
||||
|
||||
// The indextable maps the four bits we get from the polynomes
|
||||
// to an index, used in computing the costs
|
||||
for (i = 0; i < 2 * numofStates; i ++)
|
||||
indexTable [i] = (int16_t) (
|
||||
((poly1_table [i] != 0) ? 8 : 0) +
|
||||
((poly2_table [i] != 0) ? 4 : 0) +
|
||||
((poly3_table [i] != 0) ? 2 : 0) +
|
||||
((poly4_table [i] != 0) ? 1 : 0));
|
||||
|
||||
for (i = 0; i < numofStates; i ++) {
|
||||
predecessor_for_0 [i] = ((i << 1) + 00) & (numofStates - 1);
|
||||
predecessor_for_1 [i] = ((i << 1) + 01) & (numofStates - 1);
|
||||
}
|
||||
}
|
||||
|
||||
viterbiHandler::~viterbiHandler (void) {
|
||||
int i;
|
||||
//
|
||||
for (int i = 0; i < blockLength + 6; i++) {
|
||||
delete [] transCosts [i];
|
||||
delete [] history [i];
|
||||
}
|
||||
delete [] transCosts;
|
||||
delete [] history;
|
||||
delete [] stateSequence;
|
||||
}
|
||||
|
||||
// Note that the soft bits are such that
|
||||
// they are int16_t -255 -> (bit)1, +255 -> (bit)0
|
||||
void viterbiHandler::computeCostTable (int16_t sym_0,
|
||||
int16_t sym_1,
|
||||
int16_t sym_2, int16_t sym_3) {
|
||||
costTable [0] = - sym_0 - sym_1 - sym_2 - sym_3;
|
||||
costTable [1] = - sym_0 - sym_1 - sym_2 + sym_3;
|
||||
costTable [2] = - sym_0 - sym_1 + sym_2 - sym_3;
|
||||
costTable [3] = - sym_0 - sym_1 + sym_2 + sym_3;
|
||||
costTable [4] = - sym_0 + sym_1 - sym_2 - sym_3;
|
||||
costTable [5] = - sym_0 + sym_1 - sym_2 + sym_3;
|
||||
costTable [6] = - sym_0 + sym_1 + sym_2 - sym_3;
|
||||
costTable [7] = - sym_0 + sym_1 + sym_2 + sym_3;
|
||||
costTable [8] = + sym_0 - sym_1 - sym_2 - sym_3;
|
||||
costTable [9] = + sym_0 - sym_1 - sym_2 + sym_3;
|
||||
costTable [10] = + sym_0 - sym_1 + sym_2 - sym_3;
|
||||
costTable [11] = + sym_0 - sym_1 + sym_2 + sym_3;
|
||||
costTable [12] = + sym_0 + sym_1 - sym_2 - sym_3;
|
||||
costTable [13] = + sym_0 + sym_1 - sym_2 + sym_3;
|
||||
costTable [14] = + sym_0 + sym_1 + sym_2 - sym_3;
|
||||
costTable [15] = + sym_0 + sym_1 + sym_2 + sym_3;
|
||||
}
|
||||
|
||||
// block is the sequence of soft bits
|
||||
// its length = 4 * blockLength + 4 * 6
|
||||
void viterbiHandler::deconvolve (int16_t *sym, uint8_t *bitBuffer) {
|
||||
int prev_0, prev_1;
|
||||
int costs_0, costs_1;
|
||||
int i;
|
||||
|
||||
// first step is to "pump" the soft bits into the state machine
|
||||
// and compute the cost matrix.
|
||||
// we assume the overall costs for state 0 are zero
|
||||
// and remain zero
|
||||
|
||||
for (i = 1; i < blockLength + 6; i ++) {
|
||||
int16_t sym_0 = (int16_t)(- sym [4 * (i - 1) + 0]);
|
||||
int16_t sym_1 = (int16_t)(- sym [4 * (i - 1) + 1]);
|
||||
int16_t sym_2 = (int16_t)(- sym [4 * (i - 1) + 2]);
|
||||
int16_t sym_3 = (int16_t)(- sym [4 * (i - 1) + 3]);
|
||||
int *transCosts_i = transCosts [i];
|
||||
int *history_i = history [i];
|
||||
|
||||
computeCostTable (sym_0, sym_1, sym_2, sym_3);
|
||||
for (int cState = 0; cState < numofStates / 2; cState ++) {
|
||||
uint8_t entrybit = 0;
|
||||
prev_0 = predecessor_for_0 [cState];
|
||||
prev_1 = predecessor_for_1 [cState];
|
||||
// we compute the minimal costs, based on the costs of the
|
||||
// prev states, and the additional costs of arriving from
|
||||
// the previous state to the current state with the symbol "sym"
|
||||
//
|
||||
// entrybit = 0, so the index for the cost function is prev_xx
|
||||
costs_0 = transCosts [i - 1] [prev_0] +
|
||||
costTable [indexTable [prev_0]];
|
||||
costs_1 = transCosts [i - 1] [prev_1] +
|
||||
costTable [indexTable [prev_1]];
|
||||
if (costs_0 < costs_1) {
|
||||
transCosts_i [cState] = costs_0;
|
||||
history_i [cState] = prev_0;
|
||||
} else {
|
||||
transCosts_i [cState] = costs_1;
|
||||
history_i [cState] = prev_1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int cState = numofStates / 2;
|
||||
cState < numofStates; cState ++) {
|
||||
uint8_t entrybit = 1;
|
||||
prev_0 = predecessor_for_0 [cState];
|
||||
prev_1 = predecessor_for_1 [cState];
|
||||
|
||||
// we compute the minimal costs, based on the costs of the
|
||||
// prev states, and the additional costs of arriving from
|
||||
// the previous state to the current state with the symbol row "sym"
|
||||
//
|
||||
// entrybit is here "1", so the index is id cost function
|
||||
// is prev_xx + NumofStates
|
||||
costs_0 = transCosts [i - 1] [prev_0] +
|
||||
costTable [indexTable [prev_0 + numofStates]];
|
||||
costs_1 = transCosts [i - 1] [prev_1] +
|
||||
costTable [indexTable [prev_1 + numofStates]];
|
||||
if (costs_0 < costs_1) {
|
||||
transCosts_i [cState] = costs_0;
|
||||
history_i [cState] = prev_0;
|
||||
} else {
|
||||
transCosts_i [cState] = costs_1;
|
||||
history_i [cState] = prev_1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Once all costs are computed, we can look for the minimal cost
|
||||
// Our "end state" is somewhere in column blockLength + 6
|
||||
int minimalCosts = 1000000;
|
||||
int bestState = 0;
|
||||
|
||||
for (i = 0; i < numofStates; i++) {
|
||||
if (transCosts [blockLength + 6 - 1][i] < minimalCosts) {
|
||||
minimalCosts = transCosts [blockLength + 6 - 1][i];
|
||||
bestState = i;
|
||||
}
|
||||
}
|
||||
|
||||
stateSequence [blockLength + 6 - 1] = bestState;
|
||||
/*
|
||||
* Trace backgoes back to state 0, and builds up the
|
||||
* sequence of decoded symbols
|
||||
*/
|
||||
for (i = blockLength + 6 - 1; i > 0; i --)
|
||||
stateSequence [i - 1] = history [i][stateSequence[i]];
|
||||
|
||||
for (i = 1; i <= blockLength; i++)
|
||||
bitBuffer [i - 1] =
|
||||
(uint8_t) ((stateSequence [i] >= numofStates / 2) ? 01 : 00);
|
||||
}
|
||||
|
||||
/*
|
||||
* as an aid, we give a function "bitFor" that, given
|
||||
* the register state, the polynome and the bit to be inserted
|
||||
* returns the bit coming from the engine
|
||||
*/
|
||||
uint8_t viterbiHandler::bitFor (int state, int poly, int bit) {
|
||||
int theRegister;
|
||||
uint8_t resBit = 0;
|
||||
int i;
|
||||
//
|
||||
// the register after shifting "bit" in would be:
|
||||
theRegister = bit == 0 ? state : (state + numofStates);
|
||||
theRegister &= poly;
|
||||
/*
|
||||
* now for the individual bits
|
||||
*/
|
||||
for (int i = 0; i <= K; i++) {
|
||||
resBit ^= (uint8_t)(theRegister & 01);
|
||||
theRegister >>= 1;
|
||||
}
|
||||
|
||||
return resBit;
|
||||
}
|
||||
|
@@ -1,296 +0,0 @@
|
||||
#
|
||||
/*
|
||||
* Copyright (C) 2013
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB program
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* This viterbi decoder is used for deconvolving the data segments
|
||||
* for audio and/or data. The code is
|
||||
* as given by the Spiral Project. All rights gratefully acknowledged.
|
||||
* decoder
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mm_malloc.h"
|
||||
#include "viterbi.h"
|
||||
#include <cstring>
|
||||
#ifdef __MINGW32__
|
||||
#include <intrin.h>
|
||||
#include <malloc.h>
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
//
|
||||
// It took a while to discover that the polynomes I used
|
||||
// in a "home" made implementation was bitreversed!!
|
||||
// The official one is on top.
|
||||
#define K 7
|
||||
#define POLYS {0155, 0117, 0123, 0155}
|
||||
//#define POLYS {109, 79, 83, 109}
|
||||
// In the reversed form the polys look:
|
||||
//#define POLYS { 0133, 0171, 0145, 0133 }
|
||||
//#define POLYS { 91, 121, 101, 91 }
|
||||
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 137
|
||||
|
||||
//
|
||||
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
|
||||
#if (K-1<8)
|
||||
#define ADDSHIFT (8-(K-1))
|
||||
#define SUBSHIFT 0
|
||||
#elif (K-1>8)
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT ((K-1)-8)
|
||||
#else
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT 0
|
||||
#endif
|
||||
|
||||
static uint8_t Partab [] =
|
||||
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
|
||||
|
||||
//
|
||||
// One could create the table above, i.e. a 256 entry
|
||||
// odd-parity lookup table by the following function
|
||||
// It is now precomputed
|
||||
static
|
||||
void partab_init (void){
|
||||
int16_t i,cnt,ti;
|
||||
|
||||
for (i = 0; i < 256; i++){
|
||||
cnt = 0;
|
||||
ti = i;
|
||||
while (ti != 0) {
|
||||
if (ti & 1) cnt++;
|
||||
ti >>= 1;
|
||||
}
|
||||
Partab [i] = cnt & 1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t viterbi::parity (int16_t x){
|
||||
/* Fold down to one byte */
|
||||
x ^= (x >> 8);
|
||||
return Partab [x];
|
||||
}
|
||||
|
||||
static inline
|
||||
void renormalize (int16_t* X, int16_t threshold){
|
||||
int32_t i;
|
||||
|
||||
if (X [0] > threshold){
|
||||
int16_t min = X [0];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
if (min > X[i])
|
||||
min = X[i];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
X[i] -= min;
|
||||
}
|
||||
}
|
||||
|
||||
viterbi::viterbi (int16_t wordlength) {
|
||||
int polys [RATE] = POLYS;
|
||||
frameBits = wordlength;
|
||||
|
||||
int16_t i, state;
|
||||
#ifdef __MINGW32__
|
||||
uint32_t size;
|
||||
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0x0F;
|
||||
data = (uint8_t *)_aligned_malloc (size, 16);
|
||||
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof (int16_t) + 1 + 16) & ~0x0F;
|
||||
symbols = (int16_t *)_aligned_malloc (size, 16);
|
||||
size = 2 * ((wordlength + (K - 1)) * sizeof (decision_t) + 16) & ~0x0F;
|
||||
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
|
||||
#else
|
||||
if (posix_memalign ((void**)&data, 16,
|
||||
(wordlength + (K - 1))/ 8 + 1)){
|
||||
printf("Allocation of data array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&symbols, 16,
|
||||
RATE * (wordlength + (K - 1)) * sizeof(int16_t))){
|
||||
printf("Allocation of symbols array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&(vp. decisions),
|
||||
16,
|
||||
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
|
||||
printf ("Allocation of vp decisions failed\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
for (state = 0; state < NUMSTATES / 2; state++) {
|
||||
for (i = 0; i < RATE; i++)
|
||||
Branchtab [i * NUMSTATES / 2 + state] =
|
||||
(polys[i] < 0) ^
|
||||
parity((2 * state) & abs (polys[i])) ? 255 : 0;
|
||||
}
|
||||
init_viterbi (&vp, 0);
|
||||
}
|
||||
|
||||
|
||||
viterbi::~viterbi (void) {
|
||||
#ifdef __MINGW32__
|
||||
_aligned_free (vp. decisions);
|
||||
_aligned_free (data);
|
||||
_aligned_free (symbols);
|
||||
#else
|
||||
free (vp. decisions);
|
||||
free (data);
|
||||
free (symbols);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
|
||||
static inline
|
||||
uint8_t getbit (uint8_t v, int32_t o) {
|
||||
return (v & maskTable [o]) ? 1 : 0;
|
||||
}
|
||||
|
||||
void viterbi::deconvolve (int16_t *input, uint8_t *output) {
|
||||
int16_t i;
|
||||
|
||||
init_viterbi (&vp, 0);
|
||||
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
|
||||
int16_t temp = input [i] + 127;
|
||||
if (temp < 0) temp = 0;
|
||||
if (temp > 255) temp = 255;
|
||||
symbols [i] = temp;
|
||||
}
|
||||
|
||||
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
|
||||
chainback_viterbi (&vp, data, frameBits, 0);
|
||||
|
||||
for (i = 0; i < (int16_t)frameBits; i ++)
|
||||
output [i] = getbit (data [i >> 3], i & 07);
|
||||
}
|
||||
|
||||
/* C-language butterfly */
|
||||
void viterbi::BFLY (int i, int s, int16_t * syms,
|
||||
struct v * vp, decision_t * d) {
|
||||
int32_t j, decision0, decision1;
|
||||
int16_t metric, m0, m1, m2, m3;
|
||||
|
||||
metric = 0;
|
||||
for (j = 0; j < RATE;j++)
|
||||
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s * RATE + j]) >>
|
||||
METRICSHIFT ;
|
||||
metric = metric >> PRECISIONSHIFT;
|
||||
const int16_t max =
|
||||
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
|
||||
|
||||
m0 = vp -> old_metrics->t [i] + metric;
|
||||
m1 = vp -> old_metrics->t [i + NUMSTATES / 2] + (max - metric);
|
||||
m2 = vp -> old_metrics->t [i] + (max - metric);
|
||||
m3 = vp -> old_metrics->t [i + NUMSTATES / 2] + metric;
|
||||
|
||||
decision0 = ((int32_t)(m0 - m1)) > 0;
|
||||
decision1 = ((int32_t)(m2 - m3)) > 0;
|
||||
|
||||
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
|
||||
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
|
||||
|
||||
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
|
||||
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Update decoder with a block of demodulated symbols
|
||||
* Note that nbits is the number of decoded data bits, not the number
|
||||
* of symbols!
|
||||
*/
|
||||
void viterbi::update_viterbi_blk_GENERIC (struct v *vp,
|
||||
int16_t *syms, int16_t nbits){
|
||||
decision_t *d = (decision_t *)vp -> decisions;
|
||||
int32_t s, i;
|
||||
|
||||
for (s = 0; s < nbits; s++)
|
||||
memset (&d [s], 0, sizeof (decision_t));
|
||||
|
||||
for (s = 0; s < nbits; s++){
|
||||
void *tmp;
|
||||
for (i = 0; i < NUMSTATES / 2; i++)
|
||||
BFLY (i, s, syms, vp, vp -> decisions);
|
||||
|
||||
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
|
||||
// Swap pointers to old and new metrics
|
||||
tmp = vp -> old_metrics;
|
||||
vp -> old_metrics = vp -> new_metrics;
|
||||
vp -> new_metrics = (metric_t *)tmp;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Viterbi chainback
|
||||
*/
|
||||
void viterbi::chainback_viterbi (struct v *vp,
|
||||
uint8_t *data, /* Decoded output data */
|
||||
int16_t nbits, /* Number of data bits */
|
||||
uint16_t endstate){ /*Terminal encoder state */
|
||||
decision_t *d = vp -> decisions;
|
||||
|
||||
/*
|
||||
* Make room beyond the end of the encoder register so we can
|
||||
* accumulate a full byte of decoded data
|
||||
*/
|
||||
endstate = (endstate % NUMSTATES) << ADDSHIFT;
|
||||
/*
|
||||
* The store into data[] only needs to be done every 8 bits.
|
||||
* But this avoids a conditional branch, and the writes will
|
||||
* combine in the cache anyway
|
||||
*/
|
||||
d += (K - 1); /* Look past tail */
|
||||
while (nbits-- != 0){
|
||||
int k;
|
||||
// int l = (endstate >> ADDSHIFT) / 32;
|
||||
// int m = (endstate >> ADDSHIFT) % 32;
|
||||
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
|
||||
((endstate>>ADDSHIFT) % 32)) & 1;
|
||||
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
|
||||
data [nbits >> 3] = endstate >> SUBSHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize Viterbi decoder for start of new frame */
|
||||
void viterbi::init_viterbi (struct v *p, int16_t starting_state){
|
||||
struct v *vp = p;
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
vp -> metrics1.t[i] = 63;
|
||||
|
||||
vp -> old_metrics = &vp -> metrics1;
|
||||
vp -> new_metrics = &vp -> metrics2;
|
||||
/* Bias known start state */
|
||||
vp -> old_metrics-> t [starting_state & (NUMSTATES-1)] = 0;
|
||||
}
|
||||
|
@@ -1,11 +0,0 @@
|
||||
|
||||
The viterbi implementation is copied from the spiral one, all
|
||||
rights gratefully acknowledged.
|
||||
Since we serve more than a single platform, we do not use the SSE
|
||||
implementation.
|
||||
|
||||
The particular spiral implementation (see the file spiral_no_sse.c"
|
||||
is generated for the wordsize and the other parameters for FIC blocks.
|
||||
The implementation therefore has a "switch", that - when set to true -
|
||||
selects the spiral implementation, and - when set to false (the default) -
|
||||
it uses the generic implementation.
|
File diff suppressed because it is too large
Load Diff
@@ -1,35 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
|
||||
#define K 7
|
||||
#define RATE 4
|
||||
#define POLYS { 109, 79, 83, 109 }
|
||||
#define NUMSTATES 64
|
||||
#define FRAMEBITS 2048
|
||||
#define DECISIONTYPE unsigned int
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE unsigned int
|
||||
#define EBN0 3
|
||||
#define TRIALS 10000
|
||||
#define __int32 int
|
||||
#define FUNC FULL_SPIRAL
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 2000000000
|
@@ -1,698 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
|
||||
//#include <include/mm_malloc.h>
|
||||
//#include <pmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <mmintrin.h>
|
||||
#include "spiral-sse.h"
|
||||
void init_FULL_SPIRAL() {
|
||||
}
|
||||
|
||||
void FULL_SPIRAL_sse(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
|
||||
int i9;
|
||||
// for(i9 = 0; i9 <= amount; i9++) {
|
||||
for(i9 = 0; i9 < amount; i9++) {
|
||||
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
|
||||
, a840;
|
||||
int a820, a850;
|
||||
unsigned char s118, s125, s132, s139, s146, s153, s160
|
||||
, s167, s174, s181, s188, s195, s202, s209, s216
|
||||
, s223;
|
||||
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
|
||||
, *a839, *b104;
|
||||
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
|
||||
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
|
||||
, *a977, *a998;
|
||||
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
|
||||
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
|
||||
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
|
||||
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
|
||||
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
|
||||
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
|
||||
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
|
||||
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
|
||||
, *a999;
|
||||
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
|
||||
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
|
||||
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
|
||||
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
|
||||
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
|
||||
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
|
||||
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
|
||||
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
|
||||
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
|
||||
, a829, a831, a832, a835, a837, a838, a841, a843
|
||||
, a844, a845, a846, a847, a848, a857, a858, a860
|
||||
, a861, a863, a864, a866, a867, a868, a869, a870
|
||||
, a871, a878, a879, a881, a882, a884, a885, a887
|
||||
, a888, a889, a890, a891, a892, a899, a900, a902
|
||||
, a903, a905, a906, a908, a909, a910, a911, a912
|
||||
, a913, a920, a921, a923, a924, a926, a927, a929
|
||||
, a930, a931, a932, a933, a934, a941, a942, a944
|
||||
, a945, a947, a948, a950, a951, a952, a953, a954
|
||||
, a955, a962, a963, a965, a966, a968, a969, a971
|
||||
, a972, a973, a974, a975, a976, a983, a984, a986
|
||||
, a987, a989, a990, a992, a993, a994, a995, a996
|
||||
, a997, b105, b106, b107, b108, b109, b110, b111
|
||||
, b112, b113, b114, b115, b116, b117, b118, b119
|
||||
, b120, b121, b122, b123, b124, b125, b126, b127
|
||||
, b128, b129, b130, b131, b132, b133, b134, b135
|
||||
, b136, d37, d38, d39, d40, d41, d42, d43
|
||||
, d44, d45, d46, d47, d48, d49, d50, d51
|
||||
, d52, d53, d54, d55, d56, d57, d58, d59
|
||||
, d60, d61, d62, d63, d64, d65, d66, d67
|
||||
, d68, m100, m101, m102, m103, m104, m105, m106
|
||||
, m107, m108, m109, m110, m111, m112, m113, m114
|
||||
, m115, m116, m117, m118, m119, m120, m121, m122
|
||||
, m123, m124, m125, m126, m127, m128, m129, m130
|
||||
, m131, m132, m133, m134, m135, m136, m73, m74
|
||||
, m75, m76, m77, m78, m79, m80, m81, m82
|
||||
, m83, m84, m85, m86, m87, m88, m89, m90
|
||||
, m91, m92, m93, m94, m95, m96, m97, m98
|
||||
, m99, s114, s115, s116, s117, s119, s120, s121
|
||||
, s122, s123, s124, s126, s127, s128, s129, s130
|
||||
, s131, s133, s134, s135, s136, s137, s138, s140
|
||||
, s141, s142, s143, s144, s145, s147, s148, s149
|
||||
, s150, s151, s152, s154, s155, s156, s157, s158
|
||||
, s159, s161, s162, s163, s164, s165, s166, s168
|
||||
, s169, s170, s171, s172, s173, s175, s176, s177
|
||||
, s178, s179, s180, s182, s183, s184, s185, s186
|
||||
, s187, s189, s190, s191, s192, s193, s194, s196
|
||||
, s197, s198, s199, s200, s201, s203, s204, s205
|
||||
, s206, s207, s208, s210, s211, s212, s213, s214
|
||||
, s215, s217, s218, s219, s220, s221, s222, s224
|
||||
, s225, t39, t40, t41, t42, t43, t44, t45
|
||||
, t46, t47, t48, t49, t50, t51, t52, t53
|
||||
, t54, t55, t56, t57, t58, t59, t60, t61
|
||||
, t62, t63, t64, t65, t66, t67, t68, t69
|
||||
, t70;
|
||||
a818 = ((__m128i *) X);
|
||||
s114 = *(a818);
|
||||
a819 = (a818 + 8);
|
||||
s115 = *(a819);
|
||||
a820 = (8 * i9);
|
||||
a821 = (syms + a820);
|
||||
a822 = *(a821);
|
||||
a823 = _mm_set1_epi32(a822);
|
||||
a824 = ((__m128i *) Branchtab);
|
||||
a825 = *(a824);
|
||||
a826 = _mm_xor_si128(a823, a825);
|
||||
b104 = (a820 + syms);
|
||||
a827 = (b104 + 1);
|
||||
a828 = *(a827);
|
||||
a829 = _mm_set1_epi32(a828);
|
||||
a830 = (a824 + 8);
|
||||
a831 = *(a830);
|
||||
a832 = _mm_xor_si128(a829, a831);
|
||||
a833 = (b104 + 2);
|
||||
a834 = *(a833);
|
||||
a835 = _mm_set1_epi32(a834);
|
||||
a836 = (a824 + 16);
|
||||
a837 = *(a836);
|
||||
a838 = _mm_xor_si128(a835, a837);
|
||||
a839 = (b104 + 3);
|
||||
a840 = *(a839);
|
||||
a841 = _mm_set1_epi32(a840);
|
||||
a842 = (a824 + 24);
|
||||
a843 = *(a842);
|
||||
a844 = _mm_xor_si128(a841, a843);
|
||||
b105 = _mm_add_epi32(a826, a832);
|
||||
b106 = _mm_add_epi32(b105, a838);
|
||||
t39 = _mm_add_epi32(b106, a844);
|
||||
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
|
||||
m73 = _mm_add_epi32(s114, t39);
|
||||
m74 = _mm_add_epi32(s115, t40);
|
||||
m75 = _mm_add_epi32(s114, t40);
|
||||
m76 = _mm_add_epi32(s115, t39);
|
||||
d37 = _mm_cmpgt_epi32(m73, m74);
|
||||
d38 = _mm_cmpgt_epi32(m75, m76);
|
||||
a845 = _mm_andnot_si128(d37, m73);
|
||||
a846 = _mm_and_si128(d37, m74);
|
||||
s116 = _mm_or_si128(a845, a846);
|
||||
a847 = _mm_andnot_si128(d38, m75);
|
||||
a848 = _mm_and_si128(d38, m76);
|
||||
s117 = _mm_or_si128(a847, a848);
|
||||
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a849 = ((unsigned char *) dec);
|
||||
a850 = (16 * i9);
|
||||
a851 = (a849 + a850);
|
||||
*(a851) = s118;
|
||||
s119 = _mm_unpacklo_epi32(s116, s117);
|
||||
s120 = _mm_unpackhi_epi32(s116, s117);
|
||||
a852 = ((__m128i *) Y);
|
||||
*(a852) = s119;
|
||||
a853 = (a852 + 1);
|
||||
*(a853) = s120;
|
||||
a854 = (a818 + 1);
|
||||
s121 = *(a854);
|
||||
a855 = (a818 + 9);
|
||||
s122 = *(a855);
|
||||
a856 = (a824 + 1);
|
||||
a857 = *(a856);
|
||||
a858 = _mm_xor_si128(a823, a857);
|
||||
a859 = (a824 + 9);
|
||||
a860 = *(a859);
|
||||
a861 = _mm_xor_si128(a829, a860);
|
||||
a862 = (a824 + 17);
|
||||
a863 = *(a862);
|
||||
a864 = _mm_xor_si128(a835, a863);
|
||||
a865 = (a824 + 25);
|
||||
a866 = *(a865);
|
||||
a867 = _mm_xor_si128(a841, a866);
|
||||
b107 = _mm_add_epi32(a858, a861);
|
||||
b108 = _mm_add_epi32(b107, a864);
|
||||
t41 = _mm_add_epi32(b108, a867);
|
||||
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
|
||||
m77 = _mm_add_epi32(s121, t41);
|
||||
m78 = _mm_add_epi32(s122, t42);
|
||||
m79 = _mm_add_epi32(s121, t42);
|
||||
m80 = _mm_add_epi32(s122, t41);
|
||||
d39 = _mm_cmpgt_epi32(m77, m78);
|
||||
d40 = _mm_cmpgt_epi32(m79, m80);
|
||||
a868 = _mm_andnot_si128(d39, m77);
|
||||
a869 = _mm_and_si128(d39, m78);
|
||||
s123 = _mm_or_si128(a868, a869);
|
||||
a870 = _mm_andnot_si128(d40, m79);
|
||||
a871 = _mm_and_si128(d40, m80);
|
||||
s124 = _mm_or_si128(a870, a871);
|
||||
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a872 = (a851 + 1);
|
||||
*(a872) = s125;
|
||||
s126 = _mm_unpacklo_epi32(s123, s124);
|
||||
s127 = _mm_unpackhi_epi32(s123, s124);
|
||||
a873 = (a852 + 2);
|
||||
*(a873) = s126;
|
||||
a874 = (a852 + 3);
|
||||
*(a874) = s127;
|
||||
a875 = (a818 + 2);
|
||||
s128 = *(a875);
|
||||
a876 = (a818 + 10);
|
||||
s129 = *(a876);
|
||||
a877 = (a824 + 2);
|
||||
a878 = *(a877);
|
||||
a879 = _mm_xor_si128(a823, a878);
|
||||
a880 = (a824 + 10);
|
||||
a881 = *(a880);
|
||||
a882 = _mm_xor_si128(a829, a881);
|
||||
a883 = (a824 + 18);
|
||||
a884 = *(a883);
|
||||
a885 = _mm_xor_si128(a835, a884);
|
||||
a886 = (a824 + 26);
|
||||
a887 = *(a886);
|
||||
a888 = _mm_xor_si128(a841, a887);
|
||||
b109 = _mm_add_epi32(a879, a882);
|
||||
b110 = _mm_add_epi32(b109, a885);
|
||||
t43 = _mm_add_epi32(b110, a888);
|
||||
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
|
||||
m81 = _mm_add_epi32(s128, t43);
|
||||
m82 = _mm_add_epi32(s129, t44);
|
||||
m83 = _mm_add_epi32(s128, t44);
|
||||
m84 = _mm_add_epi32(s129, t43);
|
||||
d41 = _mm_cmpgt_epi32(m81, m82);
|
||||
d42 = _mm_cmpgt_epi32(m83, m84);
|
||||
a889 = _mm_andnot_si128(d41, m81);
|
||||
a890 = _mm_and_si128(d41, m82);
|
||||
s130 = _mm_or_si128(a889, a890);
|
||||
a891 = _mm_andnot_si128(d42, m83);
|
||||
a892 = _mm_and_si128(d42, m84);
|
||||
s131 = _mm_or_si128(a891, a892);
|
||||
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a893 = (a851 + 2);
|
||||
*(a893) = s132;
|
||||
s133 = _mm_unpacklo_epi32(s130, s131);
|
||||
s134 = _mm_unpackhi_epi32(s130, s131);
|
||||
a894 = (a852 + 4);
|
||||
*(a894) = s133;
|
||||
a895 = (a852 + 5);
|
||||
*(a895) = s134;
|
||||
a896 = (a818 + 3);
|
||||
s135 = *(a896);
|
||||
a897 = (a818 + 11);
|
||||
s136 = *(a897);
|
||||
a898 = (a824 + 3);
|
||||
a899 = *(a898);
|
||||
a900 = _mm_xor_si128(a823, a899);
|
||||
a901 = (a824 + 11);
|
||||
a902 = *(a901);
|
||||
a903 = _mm_xor_si128(a829, a902);
|
||||
a904 = (a824 + 19);
|
||||
a905 = *(a904);
|
||||
a906 = _mm_xor_si128(a835, a905);
|
||||
a907 = (a824 + 27);
|
||||
a908 = *(a907);
|
||||
a909 = _mm_xor_si128(a841, a908);
|
||||
b111 = _mm_add_epi32(a900, a903);
|
||||
b112 = _mm_add_epi32(b111, a906);
|
||||
t45 = _mm_add_epi32(b112, a909);
|
||||
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
|
||||
m85 = _mm_add_epi32(s135, t45);
|
||||
m86 = _mm_add_epi32(s136, t46);
|
||||
m87 = _mm_add_epi32(s135, t46);
|
||||
m88 = _mm_add_epi32(s136, t45);
|
||||
d43 = _mm_cmpgt_epi32(m85, m86);
|
||||
d44 = _mm_cmpgt_epi32(m87, m88);
|
||||
a910 = _mm_andnot_si128(d43, m85);
|
||||
a911 = _mm_and_si128(d43, m86);
|
||||
s137 = _mm_or_si128(a910, a911);
|
||||
a912 = _mm_andnot_si128(d44, m87);
|
||||
a913 = _mm_and_si128(d44, m88);
|
||||
s138 = _mm_or_si128(a912, a913);
|
||||
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a914 = (a851 + 3);
|
||||
*(a914) = s139;
|
||||
s140 = _mm_unpacklo_epi32(s137, s138);
|
||||
s141 = _mm_unpackhi_epi32(s137, s138);
|
||||
a915 = (a852 + 6);
|
||||
*(a915) = s140;
|
||||
a916 = (a852 + 7);
|
||||
*(a916) = s141;
|
||||
a917 = (a818 + 4);
|
||||
s142 = *(a917);
|
||||
a918 = (a818 + 12);
|
||||
s143 = *(a918);
|
||||
a919 = (a824 + 4);
|
||||
a920 = *(a919);
|
||||
a921 = _mm_xor_si128(a823, a920);
|
||||
a922 = (a824 + 12);
|
||||
a923 = *(a922);
|
||||
a924 = _mm_xor_si128(a829, a923);
|
||||
a925 = (a824 + 20);
|
||||
a926 = *(a925);
|
||||
a927 = _mm_xor_si128(a835, a926);
|
||||
a928 = (a824 + 28);
|
||||
a929 = *(a928);
|
||||
a930 = _mm_xor_si128(a841, a929);
|
||||
b113 = _mm_add_epi32(a921, a924);
|
||||
b114 = _mm_add_epi32(b113, a927);
|
||||
t47 = _mm_add_epi32(b114, a930);
|
||||
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
|
||||
m89 = _mm_add_epi32(s142, t47);
|
||||
m90 = _mm_add_epi32(s143, t48);
|
||||
m91 = _mm_add_epi32(s142, t48);
|
||||
m92 = _mm_add_epi32(s143, t47);
|
||||
d45 = _mm_cmpgt_epi32(m89, m90);
|
||||
d46 = _mm_cmpgt_epi32(m91, m92);
|
||||
a931 = _mm_andnot_si128(d45, m89);
|
||||
a932 = _mm_and_si128(d45, m90);
|
||||
s144 = _mm_or_si128(a931, a932);
|
||||
a933 = _mm_andnot_si128(d46, m91);
|
||||
a934 = _mm_and_si128(d46, m92);
|
||||
s145 = _mm_or_si128(a933, a934);
|
||||
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a935 = (a851 + 4);
|
||||
*(a935) = s146;
|
||||
s147 = _mm_unpacklo_epi32(s144, s145);
|
||||
s148 = _mm_unpackhi_epi32(s144, s145);
|
||||
a936 = (a852 + 8);
|
||||
*(a936) = s147;
|
||||
a937 = (a852 + 9);
|
||||
*(a937) = s148;
|
||||
a938 = (a818 + 5);
|
||||
s149 = *(a938);
|
||||
a939 = (a818 + 13);
|
||||
s150 = *(a939);
|
||||
a940 = (a824 + 5);
|
||||
a941 = *(a940);
|
||||
a942 = _mm_xor_si128(a823, a941);
|
||||
a943 = (a824 + 13);
|
||||
a944 = *(a943);
|
||||
a945 = _mm_xor_si128(a829, a944);
|
||||
a946 = (a824 + 21);
|
||||
a947 = *(a946);
|
||||
a948 = _mm_xor_si128(a835, a947);
|
||||
a949 = (a824 + 29);
|
||||
a950 = *(a949);
|
||||
a951 = _mm_xor_si128(a841, a950);
|
||||
b115 = _mm_add_epi32(a942, a945);
|
||||
b116 = _mm_add_epi32(b115, a948);
|
||||
t49 = _mm_add_epi32(b116, a951);
|
||||
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
|
||||
m93 = _mm_add_epi32(s149, t49);
|
||||
m94 = _mm_add_epi32(s150, t50);
|
||||
m95 = _mm_add_epi32(s149, t50);
|
||||
m96 = _mm_add_epi32(s150, t49);
|
||||
d47 = _mm_cmpgt_epi32(m93, m94);
|
||||
d48 = _mm_cmpgt_epi32(m95, m96);
|
||||
a952 = _mm_andnot_si128(d47, m93);
|
||||
a953 = _mm_and_si128(d47, m94);
|
||||
s151 = _mm_or_si128(a952, a953);
|
||||
a954 = _mm_andnot_si128(d48, m95);
|
||||
a955 = _mm_and_si128(d48, m96);
|
||||
s152 = _mm_or_si128(a954, a955);
|
||||
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a956 = (a851 + 5);
|
||||
*(a956) = s153;
|
||||
s154 = _mm_unpacklo_epi32(s151, s152);
|
||||
s155 = _mm_unpackhi_epi32(s151, s152);
|
||||
a957 = (a852 + 10);
|
||||
*(a957) = s154;
|
||||
a958 = (a852 + 11);
|
||||
*(a958) = s155;
|
||||
a959 = (a818 + 6);
|
||||
s156 = *(a959);
|
||||
a960 = (a818 + 14);
|
||||
s157 = *(a960);
|
||||
a961 = (a824 + 6);
|
||||
a962 = *(a961);
|
||||
a963 = _mm_xor_si128(a823, a962);
|
||||
a964 = (a824 + 14);
|
||||
a965 = *(a964);
|
||||
a966 = _mm_xor_si128(a829, a965);
|
||||
a967 = (a824 + 22);
|
||||
a968 = *(a967);
|
||||
a969 = _mm_xor_si128(a835, a968);
|
||||
a970 = (a824 + 30);
|
||||
a971 = *(a970);
|
||||
a972 = _mm_xor_si128(a841, a971);
|
||||
b117 = _mm_add_epi32(a963, a966);
|
||||
b118 = _mm_add_epi32(b117, a969);
|
||||
t51 = _mm_add_epi32(b118, a972);
|
||||
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
|
||||
m97 = _mm_add_epi32(s156, t51);
|
||||
m98 = _mm_add_epi32(s157, t52);
|
||||
m99 = _mm_add_epi32(s156, t52);
|
||||
m100 = _mm_add_epi32(s157, t51);
|
||||
d49 = _mm_cmpgt_epi32(m97, m98);
|
||||
d50 = _mm_cmpgt_epi32(m99, m100);
|
||||
a973 = _mm_andnot_si128(d49, m97);
|
||||
a974 = _mm_and_si128(d49, m98);
|
||||
s158 = _mm_or_si128(a973, a974);
|
||||
a975 = _mm_andnot_si128(d50, m99);
|
||||
a976 = _mm_and_si128(d50, m100);
|
||||
s159 = _mm_or_si128(a975, a976);
|
||||
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a977 = (a851 + 6);
|
||||
*(a977) = s160;
|
||||
s161 = _mm_unpacklo_epi32(s158, s159);
|
||||
s162 = _mm_unpackhi_epi32(s158, s159);
|
||||
a978 = (a852 + 12);
|
||||
*(a978) = s161;
|
||||
a979 = (a852 + 13);
|
||||
*(a979) = s162;
|
||||
a980 = (a818 + 7);
|
||||
s163 = *(a980);
|
||||
a981 = (a818 + 15);
|
||||
s164 = *(a981);
|
||||
a982 = (a824 + 7);
|
||||
a983 = *(a982);
|
||||
a984 = _mm_xor_si128(a823, a983);
|
||||
a985 = (a824 + 15);
|
||||
a986 = *(a985);
|
||||
a987 = _mm_xor_si128(a829, a986);
|
||||
a988 = (a824 + 23);
|
||||
a989 = *(a988);
|
||||
a990 = _mm_xor_si128(a835, a989);
|
||||
a991 = (a824 + 31);
|
||||
a992 = *(a991);
|
||||
a993 = _mm_xor_si128(a841, a992);
|
||||
b119 = _mm_add_epi32(a984, a987);
|
||||
b120 = _mm_add_epi32(b119, a990);
|
||||
t53 = _mm_add_epi32(b120, a993);
|
||||
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
|
||||
m101 = _mm_add_epi32(s163, t53);
|
||||
m102 = _mm_add_epi32(s164, t54);
|
||||
m103 = _mm_add_epi32(s163, t54);
|
||||
m104 = _mm_add_epi32(s164, t53);
|
||||
d51 = _mm_cmpgt_epi32(m101, m102);
|
||||
d52 = _mm_cmpgt_epi32(m103, m104);
|
||||
a994 = _mm_andnot_si128(d51, m101);
|
||||
a995 = _mm_and_si128(d51, m102);
|
||||
s165 = _mm_or_si128(a994, a995);
|
||||
a996 = _mm_andnot_si128(d52, m103);
|
||||
a997 = _mm_and_si128(d52, m104);
|
||||
s166 = _mm_or_si128(a996, a997);
|
||||
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a998 = (a851 + 7);
|
||||
*(a998) = s167;
|
||||
s168 = _mm_unpacklo_epi32(s165, s166);
|
||||
s169 = _mm_unpackhi_epi32(s165, s166);
|
||||
a999 = (a852 + 14);
|
||||
*(a999) = s168;
|
||||
a1000 = (a852 + 15);
|
||||
*(a1000) = s169;
|
||||
s170 = *(a852);
|
||||
s171 = *(a936);
|
||||
a1001 = (b104 + 4);
|
||||
a1002 = *(a1001);
|
||||
a1003 = _mm_set1_epi32(a1002);
|
||||
a1004 = _mm_xor_si128(a1003, a825);
|
||||
a1005 = (b104 + 5);
|
||||
a1006 = *(a1005);
|
||||
a1007 = _mm_set1_epi32(a1006);
|
||||
a1008 = _mm_xor_si128(a1007, a831);
|
||||
a1009 = (b104 + 6);
|
||||
a1010 = *(a1009);
|
||||
a1011 = _mm_set1_epi32(a1010);
|
||||
a1012 = _mm_xor_si128(a1011, a837);
|
||||
a1013 = (b104 + 7);
|
||||
a1014 = *(a1013);
|
||||
a1015 = _mm_set1_epi32(a1014);
|
||||
a1016 = _mm_xor_si128(a1015, a843);
|
||||
b121 = _mm_add_epi32(a1004, a1008);
|
||||
b122 = _mm_add_epi32(b121, a1012);
|
||||
t55 = _mm_add_epi32(b122, a1016);
|
||||
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
|
||||
m105 = _mm_add_epi32(s170, t55);
|
||||
m106 = _mm_add_epi32(s171, t56);
|
||||
m107 = _mm_add_epi32(s170, t56);
|
||||
m108 = _mm_add_epi32(s171, t55);
|
||||
d53 = _mm_cmpgt_epi32(m105, m106);
|
||||
d54 = _mm_cmpgt_epi32(m107, m108);
|
||||
a1017 = _mm_andnot_si128(d53, m105);
|
||||
a1018 = _mm_and_si128(d53, m106);
|
||||
s172 = _mm_or_si128(a1017, a1018);
|
||||
a1019 = _mm_andnot_si128(d54, m107);
|
||||
a1020 = _mm_and_si128(d54, m108);
|
||||
s173 = _mm_or_si128(a1019, a1020);
|
||||
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1021 = (a851 + 8);
|
||||
*(a1021) = s174;
|
||||
s175 = _mm_unpacklo_epi32(s172, s173);
|
||||
s176 = _mm_unpackhi_epi32(s172, s173);
|
||||
*(a818) = s175;
|
||||
*(a854) = s176;
|
||||
s177 = *(a853);
|
||||
s178 = *(a937);
|
||||
a1022 = _mm_xor_si128(a1003, a857);
|
||||
a1023 = _mm_xor_si128(a1007, a860);
|
||||
a1024 = _mm_xor_si128(a1011, a863);
|
||||
a1025 = _mm_xor_si128(a1015, a866);
|
||||
b123 = _mm_add_epi32(a1022, a1023);
|
||||
b124 = _mm_add_epi32(b123, a1024);
|
||||
t57 = _mm_add_epi32(b124, a1025);
|
||||
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
|
||||
m109 = _mm_add_epi32(s177, t57);
|
||||
m110 = _mm_add_epi32(s178, t58);
|
||||
m111 = _mm_add_epi32(s177, t58);
|
||||
m112 = _mm_add_epi32(s178, t57);
|
||||
d55 = _mm_cmpgt_epi32(m109, m110);
|
||||
d56 = _mm_cmpgt_epi32(m111, m112);
|
||||
a1026 = _mm_andnot_si128(d55, m109);
|
||||
a1027 = _mm_and_si128(d55, m110);
|
||||
s179 = _mm_or_si128(a1026, a1027);
|
||||
a1028 = _mm_andnot_si128(d56, m111);
|
||||
a1029 = _mm_and_si128(d56, m112);
|
||||
s180 = _mm_or_si128(a1028, a1029);
|
||||
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1030 = (a851 + 9);
|
||||
*(a1030) = s181;
|
||||
s182 = _mm_unpacklo_epi32(s179, s180);
|
||||
s183 = _mm_unpackhi_epi32(s179, s180);
|
||||
*(a875) = s182;
|
||||
*(a896) = s183;
|
||||
s184 = *(a873);
|
||||
s185 = *(a957);
|
||||
a1031 = _mm_xor_si128(a1003, a878);
|
||||
a1032 = _mm_xor_si128(a1007, a881);
|
||||
a1033 = _mm_xor_si128(a1011, a884);
|
||||
a1034 = _mm_xor_si128(a1015, a887);
|
||||
b125 = _mm_add_epi32(a1031, a1032);
|
||||
b126 = _mm_add_epi32(b125, a1033);
|
||||
t59 = _mm_add_epi32(b126, a1034);
|
||||
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
|
||||
m113 = _mm_add_epi32(s184, t59);
|
||||
m114 = _mm_add_epi32(s185, t60);
|
||||
m115 = _mm_add_epi32(s184, t60);
|
||||
m116 = _mm_add_epi32(s185, t59);
|
||||
d57 = _mm_cmpgt_epi32(m113, m114);
|
||||
d58 = _mm_cmpgt_epi32(m115, m116);
|
||||
a1035 = _mm_andnot_si128(d57, m113);
|
||||
a1036 = _mm_and_si128(d57, m114);
|
||||
s186 = _mm_or_si128(a1035, a1036);
|
||||
a1037 = _mm_andnot_si128(d58, m115);
|
||||
a1038 = _mm_and_si128(d58, m116);
|
||||
s187 = _mm_or_si128(a1037, a1038);
|
||||
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1039 = (a851 + 10);
|
||||
*(a1039) = s188;
|
||||
s189 = _mm_unpacklo_epi32(s186, s187);
|
||||
s190 = _mm_unpackhi_epi32(s186, s187);
|
||||
*(a917) = s189;
|
||||
*(a938) = s190;
|
||||
s191 = *(a874);
|
||||
s192 = *(a958);
|
||||
a1040 = _mm_xor_si128(a1003, a899);
|
||||
a1041 = _mm_xor_si128(a1007, a902);
|
||||
a1042 = _mm_xor_si128(a1011, a905);
|
||||
a1043 = _mm_xor_si128(a1015, a908);
|
||||
b127 = _mm_add_epi32(a1040, a1041);
|
||||
b128 = _mm_add_epi32(b127, a1042);
|
||||
t61 = _mm_add_epi32(b128, a1043);
|
||||
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
|
||||
m117 = _mm_add_epi32(s191, t61);
|
||||
m118 = _mm_add_epi32(s192, t62);
|
||||
m119 = _mm_add_epi32(s191, t62);
|
||||
m120 = _mm_add_epi32(s192, t61);
|
||||
d59 = _mm_cmpgt_epi32(m117, m118);
|
||||
d60 = _mm_cmpgt_epi32(m119, m120);
|
||||
a1044 = _mm_andnot_si128(d59, m117);
|
||||
a1045 = _mm_and_si128(d59, m118);
|
||||
s193 = _mm_or_si128(a1044, a1045);
|
||||
a1046 = _mm_andnot_si128(d60, m119);
|
||||
a1047 = _mm_and_si128(d60, m120);
|
||||
s194 = _mm_or_si128(a1046, a1047);
|
||||
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1048 = (a851 + 11);
|
||||
*(a1048) = s195;
|
||||
s196 = _mm_unpacklo_epi32(s193, s194);
|
||||
s197 = _mm_unpackhi_epi32(s193, s194);
|
||||
*(a959) = s196;
|
||||
*(a980) = s197;
|
||||
s198 = *(a894);
|
||||
s199 = *(a978);
|
||||
a1049 = _mm_xor_si128(a1003, a920);
|
||||
a1050 = _mm_xor_si128(a1007, a923);
|
||||
a1051 = _mm_xor_si128(a1011, a926);
|
||||
a1052 = _mm_xor_si128(a1015, a929);
|
||||
b129 = _mm_add_epi32(a1049, a1050);
|
||||
b130 = _mm_add_epi32(b129, a1051);
|
||||
t63 = _mm_add_epi32(b130, a1052);
|
||||
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
|
||||
m121 = _mm_add_epi32(s198, t63);
|
||||
m122 = _mm_add_epi32(s199, t64);
|
||||
m123 = _mm_add_epi32(s198, t64);
|
||||
m124 = _mm_add_epi32(s199, t63);
|
||||
d61 = _mm_cmpgt_epi32(m121, m122);
|
||||
d62 = _mm_cmpgt_epi32(m123, m124);
|
||||
a1053 = _mm_andnot_si128(d61, m121);
|
||||
a1054 = _mm_and_si128(d61, m122);
|
||||
s200 = _mm_or_si128(a1053, a1054);
|
||||
a1055 = _mm_andnot_si128(d62, m123);
|
||||
a1056 = _mm_and_si128(d62, m124);
|
||||
s201 = _mm_or_si128(a1055, a1056);
|
||||
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1057 = (a851 + 12);
|
||||
*(a1057) = s202;
|
||||
s203 = _mm_unpacklo_epi32(s200, s201);
|
||||
s204 = _mm_unpackhi_epi32(s200, s201);
|
||||
*(a819) = s203;
|
||||
*(a855) = s204;
|
||||
s205 = *(a895);
|
||||
s206 = *(a979);
|
||||
a1058 = _mm_xor_si128(a1003, a941);
|
||||
a1059 = _mm_xor_si128(a1007, a944);
|
||||
a1060 = _mm_xor_si128(a1011, a947);
|
||||
a1061 = _mm_xor_si128(a1015, a950);
|
||||
b131 = _mm_add_epi32(a1058, a1059);
|
||||
b132 = _mm_add_epi32(b131, a1060);
|
||||
t65 = _mm_add_epi32(b132, a1061);
|
||||
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
|
||||
m125 = _mm_add_epi32(s205, t65);
|
||||
m126 = _mm_add_epi32(s206, t66);
|
||||
m127 = _mm_add_epi32(s205, t66);
|
||||
m128 = _mm_add_epi32(s206, t65);
|
||||
d63 = _mm_cmpgt_epi32(m125, m126);
|
||||
d64 = _mm_cmpgt_epi32(m127, m128);
|
||||
a1062 = _mm_andnot_si128(d63, m125);
|
||||
a1063 = _mm_and_si128(d63, m126);
|
||||
s207 = _mm_or_si128(a1062, a1063);
|
||||
a1064 = _mm_andnot_si128(d64, m127);
|
||||
a1065 = _mm_and_si128(d64, m128);
|
||||
s208 = _mm_or_si128(a1064, a1065);
|
||||
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1066 = (a851 + 13);
|
||||
*(a1066) = s209;
|
||||
s210 = _mm_unpacklo_epi32(s207, s208);
|
||||
s211 = _mm_unpackhi_epi32(s207, s208);
|
||||
*(a876) = s210;
|
||||
*(a897) = s211;
|
||||
s212 = *(a915);
|
||||
s213 = *(a999);
|
||||
a1067 = _mm_xor_si128(a1003, a962);
|
||||
a1068 = _mm_xor_si128(a1007, a965);
|
||||
a1069 = _mm_xor_si128(a1011, a968);
|
||||
a1070 = _mm_xor_si128(a1015, a971);
|
||||
b133 = _mm_add_epi32(a1067, a1068);
|
||||
b134 = _mm_add_epi32(b133, a1069);
|
||||
t67 = _mm_add_epi32(b134, a1070);
|
||||
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
|
||||
m129 = _mm_add_epi32(s212, t67);
|
||||
m130 = _mm_add_epi32(s213, t68);
|
||||
m131 = _mm_add_epi32(s212, t68);
|
||||
m132 = _mm_add_epi32(s213, t67);
|
||||
d65 = _mm_cmpgt_epi32(m129, m130);
|
||||
d66 = _mm_cmpgt_epi32(m131, m132);
|
||||
a1071 = _mm_andnot_si128(d65, m129);
|
||||
a1072 = _mm_and_si128(d65, m130);
|
||||
s214 = _mm_or_si128(a1071, a1072);
|
||||
a1073 = _mm_andnot_si128(d66, m131);
|
||||
a1074 = _mm_and_si128(d66, m132);
|
||||
s215 = _mm_or_si128(a1073, a1074);
|
||||
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1075 = (a851 + 14);
|
||||
*(a1075) = s216;
|
||||
s217 = _mm_unpacklo_epi32(s214, s215);
|
||||
s218 = _mm_unpackhi_epi32(s214, s215);
|
||||
*(a918) = s217;
|
||||
*(a939) = s218;
|
||||
s219 = *(a916);
|
||||
s220 = *(a1000);
|
||||
a1076 = _mm_xor_si128(a1003, a983);
|
||||
a1077 = _mm_xor_si128(a1007, a986);
|
||||
a1078 = _mm_xor_si128(a1011, a989);
|
||||
a1079 = _mm_xor_si128(a1015, a992);
|
||||
b135 = _mm_add_epi32(a1076, a1077);
|
||||
b136 = _mm_add_epi32(b135, a1078);
|
||||
t69 = _mm_add_epi32(b136, a1079);
|
||||
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
|
||||
m133 = _mm_add_epi32(s219, t69);
|
||||
m134 = _mm_add_epi32(s220, t70);
|
||||
m135 = _mm_add_epi32(s219, t70);
|
||||
m136 = _mm_add_epi32(s220, t69);
|
||||
d67 = _mm_cmpgt_epi32(m133, m134);
|
||||
d68 = _mm_cmpgt_epi32(m135, m136);
|
||||
a1080 = _mm_andnot_si128(d67, m133);
|
||||
a1081 = _mm_and_si128(d67, m134);
|
||||
s221 = _mm_or_si128(a1080, a1081);
|
||||
a1082 = _mm_andnot_si128(d68, m135);
|
||||
a1083 = _mm_and_si128(d68, m136);
|
||||
s222 = _mm_or_si128(a1082, a1083);
|
||||
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1084 = (a851 + 15);
|
||||
*(a1084) = s223;
|
||||
s224 = _mm_unpacklo_epi32(s221, s222);
|
||||
s225 = _mm_unpackhi_epi32(s221, s222);
|
||||
*(a960) = s224;
|
||||
*(a981) = s225;
|
||||
}
|
||||
/* skip */
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
#include <stdint.h>
|
||||
|
||||
#define K 7
|
||||
#define RATE 4
|
||||
#define POLYS { 109, 79, 83, 109 }
|
||||
#define NUMSTATES 64
|
||||
#define FRAMEBITS 2048
|
||||
#define DECISIONTYPE unsigned int
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE uint32_t
|
||||
#define EBN0 3
|
||||
#define TRIALS 10000
|
||||
#define __int32 int
|
||||
#define FUNC FULL_SPIRAL
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 2000000000
|
@@ -1,370 +0,0 @@
|
||||
#
|
||||
/*
|
||||
* Copyright (C) 201 .. 2017
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of Qt-DAB
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The convolutional decoder for the FIC blocks has fixed sized
|
||||
* blocks, so we can use pre-generated code - for that specific
|
||||
* sized blocks - generated by the spiral project
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mm_malloc.h"
|
||||
#include "viterbi-768.h"
|
||||
#include <cstring>
|
||||
#ifdef __MINGW32__
|
||||
#include <intrin.h>
|
||||
#include <malloc.h>
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
//
|
||||
// It took a while to discover that the polynomes we used
|
||||
// in our own "straightforward" implementation was bitreversed!!
|
||||
// The official one is on top.
|
||||
#define K 7
|
||||
#define POLYS { 0155, 0117, 0123, 0155}
|
||||
//#define POLYS {109, 79, 83, 109}
|
||||
// In the reversed form the polys look:
|
||||
//#define POLYS { 0133, 0171, 0145, 0133 }
|
||||
//#define POLYS { 91, 121, 101, 91 }
|
||||
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 137
|
||||
|
||||
//
|
||||
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
|
||||
#if (K-1<8)
|
||||
#define ADDSHIFT (8-(K-1))
|
||||
#define SUBSHIFT 0
|
||||
#elif (K-1>8)
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT ((K-1)-8)
|
||||
#else
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT 0
|
||||
#endif
|
||||
|
||||
|
||||
static uint8_t Partab [] =
|
||||
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
|
||||
|
||||
//
|
||||
// One could create the table above, i.e. a 256 entry
|
||||
// odd-parity lookup table by the following function
|
||||
// It is now precomputed
|
||||
void viterbi_768::partab_init (void){
|
||||
int16_t i,cnt,ti;
|
||||
|
||||
for (i = 0; i < 256; i++){
|
||||
cnt = 0;
|
||||
ti = i;
|
||||
while (ti != 0) {
|
||||
if (ti & 1) cnt++;
|
||||
ti >>= 1;
|
||||
}
|
||||
Partab [i] = cnt & 1;
|
||||
}
|
||||
}
|
||||
|
||||
int viterbi_768::parity (int x){
|
||||
/* Fold down to one byte */
|
||||
x ^= (x >> 16);
|
||||
x ^= (x >> 8);
|
||||
return Partab [x];
|
||||
}
|
||||
|
||||
static inline
|
||||
void renormalize (COMPUTETYPE* X, COMPUTETYPE threshold){
|
||||
int32_t i;
|
||||
|
||||
if (X [0] > threshold){
|
||||
COMPUTETYPE min = X [0];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
if (min > X[i])
|
||||
min = X[i];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
X[i] -= min;
|
||||
}
|
||||
}
|
||||
//
|
||||
//
|
||||
// The main use of the viterbi decoder is in handling the FIC blocks
|
||||
// There are (in mode 1) 3 ofdm blocks, giving 4 FIC blocks
|
||||
// There all have a predefined length. In that case we use the
|
||||
// "fast" (i.e. spiral) code, otherwise we use the generic code
|
||||
viterbi_768::viterbi_768 (int16_t wordlength, bool spiral) {
|
||||
int polys [RATE] = POLYS;
|
||||
int16_t i, state;
|
||||
#ifdef __MINGW32__
|
||||
uint32_t size;
|
||||
#endif
|
||||
|
||||
frameBits = wordlength;
|
||||
this -> spiral = spiral;
|
||||
// partab_init ();
|
||||
|
||||
// B I G N O T E The spiral code uses (wordLength + (K - 1) * sizeof ...
|
||||
// However, the application then crashes, so something is not OK
|
||||
// By doubling the size, the problem disappears. It is not solved though
|
||||
// and not further investigation.
|
||||
#ifdef __MINGW32__
|
||||
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0xF;
|
||||
data = (uint8_t *)_aligned_malloc (size, 16);
|
||||
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE) + 16) & ~0xF;
|
||||
symbols = (COMPUTETYPE *)_aligned_malloc (size, 16);
|
||||
size = 2 * (wordlength + (K - 1)) * sizeof (decision_t);
|
||||
size = (size + 16) & ~0xF;
|
||||
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
|
||||
#else
|
||||
if (posix_memalign ((void**)&data, 16,
|
||||
(wordlength + (K - 1))/ 8 + 1)){
|
||||
printf("Allocation of data array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&symbols, 16,
|
||||
RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE))){
|
||||
printf("Allocation of symbols array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&(vp. decisions),
|
||||
16,
|
||||
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
|
||||
printf ("Allocation of vp decisions failed\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
for (state = 0; state < NUMSTATES / 2; state++) {
|
||||
for (i = 0; i < RATE; i++)
|
||||
Branchtab [i * NUMSTATES / 2 + state] =
|
||||
(polys[i] < 0) ^
|
||||
parity((2 * state) & abs (polys[i])) ? 255 : 0;
|
||||
}
|
||||
//
|
||||
init_viterbi (&vp, 0);
|
||||
}
|
||||
|
||||
|
||||
viterbi_768::~viterbi_768 (void) {
|
||||
#ifdef __MINGW32__
|
||||
_aligned_free (vp. decisions);
|
||||
_aligned_free (data);
|
||||
_aligned_free (symbols);
|
||||
#else
|
||||
free (vp. decisions);
|
||||
free (data);
|
||||
free (symbols);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
|
||||
static inline
|
||||
uint8_t getbit (uint8_t v, int32_t o) {
|
||||
return (v & maskTable [o]) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
// depends: POLYS, RATE, COMPUTETYPE
|
||||
// encode was only used for testing purposes
|
||||
//void encode (/*const*/ unsigned char *bytes, COMPUTETYPE *symbols, int nbits) {
|
||||
//int i, k;
|
||||
//int polys [RATE] = POLYS;
|
||||
//int sr = 0;
|
||||
//
|
||||
//// FIXME: this is slowish
|
||||
//// -- remember about the padding!
|
||||
// for (i = 0; i < nbits + (K - 1); i++) {
|
||||
// int b = bytes[i/8];
|
||||
// int j = i % 8;
|
||||
// int bit = (b >> (7-j)) & 1;
|
||||
//
|
||||
// sr = (sr << 1) | bit;
|
||||
// for (k = 0; k < RATE; k++)
|
||||
// *(symbols++) = parity(sr & polys[k]);
|
||||
// }
|
||||
//}
|
||||
|
||||
// Note that our DAB environment maps the softbits to -127 .. 127
|
||||
// we have to map that onto 0 .. 255
|
||||
|
||||
void viterbi_768::deconvolve (int16_t *input, uint8_t *output) {
|
||||
uint32_t i;
|
||||
|
||||
init_viterbi (&vp, 0);
|
||||
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
|
||||
int16_t temp = input [i] + 127;
|
||||
if (temp < 0) temp = 0;
|
||||
if (temp > 255) temp = 255;
|
||||
symbols [i] = temp;
|
||||
}
|
||||
if (!spiral)
|
||||
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
|
||||
else
|
||||
update_viterbi_blk_SPIRAL (&vp, symbols, frameBits + (K - 1));
|
||||
|
||||
chainback_viterbi (&vp, data, frameBits, 0);
|
||||
|
||||
for (i = 0; i < (uint16_t)frameBits; i ++)
|
||||
output [i] = getbit (data [i >> 3], i & 07);
|
||||
}
|
||||
|
||||
/* C-language butterfly */
|
||||
void viterbi_768::BFLY (int i, int s, COMPUTETYPE * syms,
|
||||
struct v * vp, decision_t * d) {
|
||||
int32_t j, decision0, decision1;
|
||||
COMPUTETYPE metric,m0,m1,m2,m3;
|
||||
|
||||
metric =0;
|
||||
for (j = 0; j < RATE;j++)
|
||||
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s*RATE+j]) >>
|
||||
METRICSHIFT ;
|
||||
metric = metric >> PRECISIONSHIFT;
|
||||
const COMPUTETYPE max =
|
||||
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
|
||||
|
||||
m0 = vp->old_metrics->t [i] + metric;
|
||||
m1 = vp->old_metrics->t [i + NUMSTATES / 2] + (max - metric);
|
||||
m2 = vp->old_metrics->t [i] + (max - metric);
|
||||
m3 = vp->old_metrics->t [i + NUMSTATES / 2] + metric;
|
||||
|
||||
decision0 = ((int32_t)(m0 - m1)) > 0;
|
||||
decision1 = ((int32_t)(m2 - m3)) > 0;
|
||||
|
||||
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
|
||||
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
|
||||
|
||||
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
|
||||
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
|
||||
}
|
||||
|
||||
/* Update decoder with a block of demodulated symbols
|
||||
* Note that nbits is the number of decoded data bits, not the number
|
||||
* of symbols!
|
||||
*/
|
||||
void viterbi_768::update_viterbi_blk_GENERIC (struct v *vp,
|
||||
COMPUTETYPE *syms,
|
||||
int16_t nbits){
|
||||
decision_t *d = (decision_t *)vp -> decisions;
|
||||
int32_t s, i;
|
||||
|
||||
for (s = 0; s < nbits; s++)
|
||||
memset (&d [s], 0, sizeof (decision_t));
|
||||
|
||||
for (s = 0; s < nbits; s++){
|
||||
void *tmp;
|
||||
for (i = 0; i < NUMSTATES / 2; i++)
|
||||
BFLY (i, s, syms, vp, vp -> decisions);
|
||||
|
||||
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
|
||||
// Swap pointers to old and new metrics
|
||||
tmp = vp -> old_metrics;
|
||||
vp -> old_metrics = vp -> new_metrics;
|
||||
vp -> new_metrics = (metric_t *)tmp;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#ifndef SSE_AVAILABLE
|
||||
void FULL_SPIRAL_no_sse (int,
|
||||
#else
|
||||
void FULL_SPIRAL_sse (int,
|
||||
#endif
|
||||
COMPUTETYPE *Y,
|
||||
COMPUTETYPE *X,
|
||||
COMPUTETYPE *syms,
|
||||
DECISIONTYPE *dec,
|
||||
COMPUTETYPE *Branchtab);
|
||||
}
|
||||
|
||||
void viterbi_768::update_viterbi_blk_SPIRAL (struct v *vp,
|
||||
COMPUTETYPE *syms,
|
||||
int16_t nbits){
|
||||
decision_t *d = (decision_t *)vp -> decisions;
|
||||
int32_t s;
|
||||
|
||||
for (s = 0; s < nbits; s++)
|
||||
memset (d + s, 0, sizeof(decision_t));
|
||||
|
||||
#ifndef SSE_AVAILABLE
|
||||
FULL_SPIRAL_no_sse (nbits,
|
||||
#else
|
||||
FULL_SPIRAL_sse (nbits,
|
||||
#endif
|
||||
vp -> new_metrics -> t,
|
||||
vp -> old_metrics -> t,
|
||||
syms,
|
||||
d -> t, Branchtab);
|
||||
}
|
||||
|
||||
//
|
||||
/* Viterbi chainback */
|
||||
void viterbi_768::chainback_viterbi (struct v *vp,
|
||||
uint8_t *data, /* Decoded output data */
|
||||
int16_t nbits, /* Number of data bits */
|
||||
uint16_t endstate){ /*Terminal encoder state */
|
||||
decision_t *d = vp -> decisions;
|
||||
|
||||
/* Make room beyond the end of the encoder register so we can
|
||||
* accumulate a full byte of decoded data
|
||||
*/
|
||||
endstate = (endstate % NUMSTATES) << ADDSHIFT;
|
||||
/* The store into data[] only needs to be done every 8 bits.
|
||||
* But this avoids a conditional branch, and the writes will
|
||||
* combine in the cache anyway
|
||||
*/
|
||||
d += (K - 1); /* Look past tail */
|
||||
while (nbits-- != 0){
|
||||
int k;
|
||||
// int l = (endstate >> ADDSHIFT) / 32;
|
||||
// int m = (endstate >> ADDSHIFT) % 32;
|
||||
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
|
||||
((endstate>>ADDSHIFT) % 32)) & 1;
|
||||
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
|
||||
data [nbits >> 3] = endstate >> SUBSHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize Viterbi decoder for start of new frame */
|
||||
void viterbi_768::init_viterbi (struct v *p, int16_t starting_state){
|
||||
struct v *vp = p;
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
vp -> metrics1.t[i] = 63;
|
||||
|
||||
vp -> old_metrics = &vp -> metrics1;
|
||||
vp -> new_metrics = &vp -> metrics2;
|
||||
/* Bias known start state */
|
||||
vp -> old_metrics-> t[starting_state & (NUMSTATES-1)] = 0;
|
||||
}
|
||||
|
@@ -1,11 +0,0 @@
|
||||
|
||||
The viterbi implementation is copied from the spiral one, all
|
||||
rights gratefully acknowledged.
|
||||
Since we serve more than a single platform, we do not use the SSE
|
||||
implementation.
|
||||
|
||||
The particular spiral implementation (see the file spiral_no_sse.c"
|
||||
is generated for the wordsize and the other parameters for FIC blocks.
|
||||
The implementation therefore has a "switch", that - when set to true -
|
||||
selects the spiral implementation, and - when set to false (the default) -
|
||||
it uses the generic implementation.
|
File diff suppressed because it is too large
Load Diff
@@ -1,701 +0,0 @@
|
||||
#ifdef NEON_AVAILABLE
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
|
||||
//#include <include/mm_malloc.h>
|
||||
//#include <pmmintrin.h>
|
||||
//#include <emmintrin.h>
|
||||
//#include <xmmintrin.h>
|
||||
//#include <mmintrin.h>
|
||||
#include "SSE2NEON.h"
|
||||
#include "spiral-neon.h"
|
||||
void init_FULL_SPIRAL() {
|
||||
}
|
||||
|
||||
void FULL_SPIRAL_neon(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
|
||||
int i9;
|
||||
// for(i9 = 0; i9 <= amount; i9++) {
|
||||
for(i9 = 0; i9 < amount; i9++) {
|
||||
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
|
||||
, a840;
|
||||
int a820, a850;
|
||||
unsigned char s118, s125, s132, s139, s146, s153, s160
|
||||
, s167, s174, s181, s188, s195, s202, s209, s216
|
||||
, s223;
|
||||
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
|
||||
, *a839, *b104;
|
||||
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
|
||||
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
|
||||
, *a977, *a998;
|
||||
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
|
||||
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
|
||||
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
|
||||
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
|
||||
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
|
||||
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
|
||||
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
|
||||
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
|
||||
, *a999;
|
||||
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
|
||||
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
|
||||
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
|
||||
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
|
||||
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
|
||||
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
|
||||
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
|
||||
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
|
||||
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
|
||||
, a829, a831, a832, a835, a837, a838, a841, a843
|
||||
, a844, a845, a846, a847, a848, a857, a858, a860
|
||||
, a861, a863, a864, a866, a867, a868, a869, a870
|
||||
, a871, a878, a879, a881, a882, a884, a885, a887
|
||||
, a888, a889, a890, a891, a892, a899, a900, a902
|
||||
, a903, a905, a906, a908, a909, a910, a911, a912
|
||||
, a913, a920, a921, a923, a924, a926, a927, a929
|
||||
, a930, a931, a932, a933, a934, a941, a942, a944
|
||||
, a945, a947, a948, a950, a951, a952, a953, a954
|
||||
, a955, a962, a963, a965, a966, a968, a969, a971
|
||||
, a972, a973, a974, a975, a976, a983, a984, a986
|
||||
, a987, a989, a990, a992, a993, a994, a995, a996
|
||||
, a997, b105, b106, b107, b108, b109, b110, b111
|
||||
, b112, b113, b114, b115, b116, b117, b118, b119
|
||||
, b120, b121, b122, b123, b124, b125, b126, b127
|
||||
, b128, b129, b130, b131, b132, b133, b134, b135
|
||||
, b136, d37, d38, d39, d40, d41, d42, d43
|
||||
, d44, d45, d46, d47, d48, d49, d50, d51
|
||||
, d52, d53, d54, d55, d56, d57, d58, d59
|
||||
, d60, d61, d62, d63, d64, d65, d66, d67
|
||||
, d68, m100, m101, m102, m103, m104, m105, m106
|
||||
, m107, m108, m109, m110, m111, m112, m113, m114
|
||||
, m115, m116, m117, m118, m119, m120, m121, m122
|
||||
, m123, m124, m125, m126, m127, m128, m129, m130
|
||||
, m131, m132, m133, m134, m135, m136, m73, m74
|
||||
, m75, m76, m77, m78, m79, m80, m81, m82
|
||||
, m83, m84, m85, m86, m87, m88, m89, m90
|
||||
, m91, m92, m93, m94, m95, m96, m97, m98
|
||||
, m99, s114, s115, s116, s117, s119, s120, s121
|
||||
, s122, s123, s124, s126, s127, s128, s129, s130
|
||||
, s131, s133, s134, s135, s136, s137, s138, s140
|
||||
, s141, s142, s143, s144, s145, s147, s148, s149
|
||||
, s150, s151, s152, s154, s155, s156, s157, s158
|
||||
, s159, s161, s162, s163, s164, s165, s166, s168
|
||||
, s169, s170, s171, s172, s173, s175, s176, s177
|
||||
, s178, s179, s180, s182, s183, s184, s185, s186
|
||||
, s187, s189, s190, s191, s192, s193, s194, s196
|
||||
, s197, s198, s199, s200, s201, s203, s204, s205
|
||||
, s206, s207, s208, s210, s211, s212, s213, s214
|
||||
, s215, s217, s218, s219, s220, s221, s222, s224
|
||||
, s225, t39, t40, t41, t42, t43, t44, t45
|
||||
, t46, t47, t48, t49, t50, t51, t52, t53
|
||||
, t54, t55, t56, t57, t58, t59, t60, t61
|
||||
, t62, t63, t64, t65, t66, t67, t68, t69
|
||||
, t70;
|
||||
a818 = ((__m128i *) X);
|
||||
s114 = *(a818);
|
||||
a819 = (a818 + 8);
|
||||
s115 = *(a819);
|
||||
a820 = (8 * i9);
|
||||
a821 = (syms + a820);
|
||||
a822 = *(a821);
|
||||
a823 = _mm_set1_epi32(a822);
|
||||
a824 = ((__m128i *) Branchtab);
|
||||
a825 = *(a824);
|
||||
a826 = _mm_xor_si128(a823, a825);
|
||||
b104 = (a820 + syms);
|
||||
a827 = (b104 + 1);
|
||||
a828 = *(a827);
|
||||
a829 = _mm_set1_epi32(a828);
|
||||
a830 = (a824 + 8);
|
||||
a831 = *(a830);
|
||||
a832 = _mm_xor_si128(a829, a831);
|
||||
a833 = (b104 + 2);
|
||||
a834 = *(a833);
|
||||
a835 = _mm_set1_epi32(a834);
|
||||
a836 = (a824 + 16);
|
||||
a837 = *(a836);
|
||||
a838 = _mm_xor_si128(a835, a837);
|
||||
a839 = (b104 + 3);
|
||||
a840 = *(a839);
|
||||
a841 = _mm_set1_epi32(a840);
|
||||
a842 = (a824 + 24);
|
||||
a843 = *(a842);
|
||||
a844 = _mm_xor_si128(a841, a843);
|
||||
b105 = _mm_add_epi32(a826, a832);
|
||||
b106 = _mm_add_epi32(b105, a838);
|
||||
t39 = _mm_add_epi32(b106, a844);
|
||||
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
|
||||
m73 = _mm_add_epi32(s114, t39);
|
||||
m74 = _mm_add_epi32(s115, t40);
|
||||
m75 = _mm_add_epi32(s114, t40);
|
||||
m76 = _mm_add_epi32(s115, t39);
|
||||
d37 = _mm_cmpgt_epi32(m73, m74);
|
||||
d38 = _mm_cmpgt_epi32(m75, m76);
|
||||
a845 = _mm_andnot_si128(d37, m73);
|
||||
a846 = _mm_and_si128(d37, m74);
|
||||
s116 = _mm_or_si128(a845, a846);
|
||||
a847 = _mm_andnot_si128(d38, m75);
|
||||
a848 = _mm_and_si128(d38, m76);
|
||||
s117 = _mm_or_si128(a847, a848);
|
||||
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a849 = ((unsigned char *) dec);
|
||||
a850 = (16 * i9);
|
||||
a851 = (a849 + a850);
|
||||
*(a851) = s118;
|
||||
s119 = _mm_unpacklo_epi32(s116, s117);
|
||||
s120 = _mm_unpackhi_epi32(s116, s117);
|
||||
a852 = ((__m128i *) Y);
|
||||
*(a852) = s119;
|
||||
a853 = (a852 + 1);
|
||||
*(a853) = s120;
|
||||
a854 = (a818 + 1);
|
||||
s121 = *(a854);
|
||||
a855 = (a818 + 9);
|
||||
s122 = *(a855);
|
||||
a856 = (a824 + 1);
|
||||
a857 = *(a856);
|
||||
a858 = _mm_xor_si128(a823, a857);
|
||||
a859 = (a824 + 9);
|
||||
a860 = *(a859);
|
||||
a861 = _mm_xor_si128(a829, a860);
|
||||
a862 = (a824 + 17);
|
||||
a863 = *(a862);
|
||||
a864 = _mm_xor_si128(a835, a863);
|
||||
a865 = (a824 + 25);
|
||||
a866 = *(a865);
|
||||
a867 = _mm_xor_si128(a841, a866);
|
||||
b107 = _mm_add_epi32(a858, a861);
|
||||
b108 = _mm_add_epi32(b107, a864);
|
||||
t41 = _mm_add_epi32(b108, a867);
|
||||
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
|
||||
m77 = _mm_add_epi32(s121, t41);
|
||||
m78 = _mm_add_epi32(s122, t42);
|
||||
m79 = _mm_add_epi32(s121, t42);
|
||||
m80 = _mm_add_epi32(s122, t41);
|
||||
d39 = _mm_cmpgt_epi32(m77, m78);
|
||||
d40 = _mm_cmpgt_epi32(m79, m80);
|
||||
a868 = _mm_andnot_si128(d39, m77);
|
||||
a869 = _mm_and_si128(d39, m78);
|
||||
s123 = _mm_or_si128(a868, a869);
|
||||
a870 = _mm_andnot_si128(d40, m79);
|
||||
a871 = _mm_and_si128(d40, m80);
|
||||
s124 = _mm_or_si128(a870, a871);
|
||||
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a872 = (a851 + 1);
|
||||
*(a872) = s125;
|
||||
s126 = _mm_unpacklo_epi32(s123, s124);
|
||||
s127 = _mm_unpackhi_epi32(s123, s124);
|
||||
a873 = (a852 + 2);
|
||||
*(a873) = s126;
|
||||
a874 = (a852 + 3);
|
||||
*(a874) = s127;
|
||||
a875 = (a818 + 2);
|
||||
s128 = *(a875);
|
||||
a876 = (a818 + 10);
|
||||
s129 = *(a876);
|
||||
a877 = (a824 + 2);
|
||||
a878 = *(a877);
|
||||
a879 = _mm_xor_si128(a823, a878);
|
||||
a880 = (a824 + 10);
|
||||
a881 = *(a880);
|
||||
a882 = _mm_xor_si128(a829, a881);
|
||||
a883 = (a824 + 18);
|
||||
a884 = *(a883);
|
||||
a885 = _mm_xor_si128(a835, a884);
|
||||
a886 = (a824 + 26);
|
||||
a887 = *(a886);
|
||||
a888 = _mm_xor_si128(a841, a887);
|
||||
b109 = _mm_add_epi32(a879, a882);
|
||||
b110 = _mm_add_epi32(b109, a885);
|
||||
t43 = _mm_add_epi32(b110, a888);
|
||||
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
|
||||
m81 = _mm_add_epi32(s128, t43);
|
||||
m82 = _mm_add_epi32(s129, t44);
|
||||
m83 = _mm_add_epi32(s128, t44);
|
||||
m84 = _mm_add_epi32(s129, t43);
|
||||
d41 = _mm_cmpgt_epi32(m81, m82);
|
||||
d42 = _mm_cmpgt_epi32(m83, m84);
|
||||
a889 = _mm_andnot_si128(d41, m81);
|
||||
a890 = _mm_and_si128(d41, m82);
|
||||
s130 = _mm_or_si128(a889, a890);
|
||||
a891 = _mm_andnot_si128(d42, m83);
|
||||
a892 = _mm_and_si128(d42, m84);
|
||||
s131 = _mm_or_si128(a891, a892);
|
||||
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a893 = (a851 + 2);
|
||||
*(a893) = s132;
|
||||
s133 = _mm_unpacklo_epi32(s130, s131);
|
||||
s134 = _mm_unpackhi_epi32(s130, s131);
|
||||
a894 = (a852 + 4);
|
||||
*(a894) = s133;
|
||||
a895 = (a852 + 5);
|
||||
*(a895) = s134;
|
||||
a896 = (a818 + 3);
|
||||
s135 = *(a896);
|
||||
a897 = (a818 + 11);
|
||||
s136 = *(a897);
|
||||
a898 = (a824 + 3);
|
||||
a899 = *(a898);
|
||||
a900 = _mm_xor_si128(a823, a899);
|
||||
a901 = (a824 + 11);
|
||||
a902 = *(a901);
|
||||
a903 = _mm_xor_si128(a829, a902);
|
||||
a904 = (a824 + 19);
|
||||
a905 = *(a904);
|
||||
a906 = _mm_xor_si128(a835, a905);
|
||||
a907 = (a824 + 27);
|
||||
a908 = *(a907);
|
||||
a909 = _mm_xor_si128(a841, a908);
|
||||
b111 = _mm_add_epi32(a900, a903);
|
||||
b112 = _mm_add_epi32(b111, a906);
|
||||
t45 = _mm_add_epi32(b112, a909);
|
||||
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
|
||||
m85 = _mm_add_epi32(s135, t45);
|
||||
m86 = _mm_add_epi32(s136, t46);
|
||||
m87 = _mm_add_epi32(s135, t46);
|
||||
m88 = _mm_add_epi32(s136, t45);
|
||||
d43 = _mm_cmpgt_epi32(m85, m86);
|
||||
d44 = _mm_cmpgt_epi32(m87, m88);
|
||||
a910 = _mm_andnot_si128(d43, m85);
|
||||
a911 = _mm_and_si128(d43, m86);
|
||||
s137 = _mm_or_si128(a910, a911);
|
||||
a912 = _mm_andnot_si128(d44, m87);
|
||||
a913 = _mm_and_si128(d44, m88);
|
||||
s138 = _mm_or_si128(a912, a913);
|
||||
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a914 = (a851 + 3);
|
||||
*(a914) = s139;
|
||||
s140 = _mm_unpacklo_epi32(s137, s138);
|
||||
s141 = _mm_unpackhi_epi32(s137, s138);
|
||||
a915 = (a852 + 6);
|
||||
*(a915) = s140;
|
||||
a916 = (a852 + 7);
|
||||
*(a916) = s141;
|
||||
a917 = (a818 + 4);
|
||||
s142 = *(a917);
|
||||
a918 = (a818 + 12);
|
||||
s143 = *(a918);
|
||||
a919 = (a824 + 4);
|
||||
a920 = *(a919);
|
||||
a921 = _mm_xor_si128(a823, a920);
|
||||
a922 = (a824 + 12);
|
||||
a923 = *(a922);
|
||||
a924 = _mm_xor_si128(a829, a923);
|
||||
a925 = (a824 + 20);
|
||||
a926 = *(a925);
|
||||
a927 = _mm_xor_si128(a835, a926);
|
||||
a928 = (a824 + 28);
|
||||
a929 = *(a928);
|
||||
a930 = _mm_xor_si128(a841, a929);
|
||||
b113 = _mm_add_epi32(a921, a924);
|
||||
b114 = _mm_add_epi32(b113, a927);
|
||||
t47 = _mm_add_epi32(b114, a930);
|
||||
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
|
||||
m89 = _mm_add_epi32(s142, t47);
|
||||
m90 = _mm_add_epi32(s143, t48);
|
||||
m91 = _mm_add_epi32(s142, t48);
|
||||
m92 = _mm_add_epi32(s143, t47);
|
||||
d45 = _mm_cmpgt_epi32(m89, m90);
|
||||
d46 = _mm_cmpgt_epi32(m91, m92);
|
||||
a931 = _mm_andnot_si128(d45, m89);
|
||||
a932 = _mm_and_si128(d45, m90);
|
||||
s144 = _mm_or_si128(a931, a932);
|
||||
a933 = _mm_andnot_si128(d46, m91);
|
||||
a934 = _mm_and_si128(d46, m92);
|
||||
s145 = _mm_or_si128(a933, a934);
|
||||
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a935 = (a851 + 4);
|
||||
*(a935) = s146;
|
||||
s147 = _mm_unpacklo_epi32(s144, s145);
|
||||
s148 = _mm_unpackhi_epi32(s144, s145);
|
||||
a936 = (a852 + 8);
|
||||
*(a936) = s147;
|
||||
a937 = (a852 + 9);
|
||||
*(a937) = s148;
|
||||
a938 = (a818 + 5);
|
||||
s149 = *(a938);
|
||||
a939 = (a818 + 13);
|
||||
s150 = *(a939);
|
||||
a940 = (a824 + 5);
|
||||
a941 = *(a940);
|
||||
a942 = _mm_xor_si128(a823, a941);
|
||||
a943 = (a824 + 13);
|
||||
a944 = *(a943);
|
||||
a945 = _mm_xor_si128(a829, a944);
|
||||
a946 = (a824 + 21);
|
||||
a947 = *(a946);
|
||||
a948 = _mm_xor_si128(a835, a947);
|
||||
a949 = (a824 + 29);
|
||||
a950 = *(a949);
|
||||
a951 = _mm_xor_si128(a841, a950);
|
||||
b115 = _mm_add_epi32(a942, a945);
|
||||
b116 = _mm_add_epi32(b115, a948);
|
||||
t49 = _mm_add_epi32(b116, a951);
|
||||
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
|
||||
m93 = _mm_add_epi32(s149, t49);
|
||||
m94 = _mm_add_epi32(s150, t50);
|
||||
m95 = _mm_add_epi32(s149, t50);
|
||||
m96 = _mm_add_epi32(s150, t49);
|
||||
d47 = _mm_cmpgt_epi32(m93, m94);
|
||||
d48 = _mm_cmpgt_epi32(m95, m96);
|
||||
a952 = _mm_andnot_si128(d47, m93);
|
||||
a953 = _mm_and_si128(d47, m94);
|
||||
s151 = _mm_or_si128(a952, a953);
|
||||
a954 = _mm_andnot_si128(d48, m95);
|
||||
a955 = _mm_and_si128(d48, m96);
|
||||
s152 = _mm_or_si128(a954, a955);
|
||||
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a956 = (a851 + 5);
|
||||
*(a956) = s153;
|
||||
s154 = _mm_unpacklo_epi32(s151, s152);
|
||||
s155 = _mm_unpackhi_epi32(s151, s152);
|
||||
a957 = (a852 + 10);
|
||||
*(a957) = s154;
|
||||
a958 = (a852 + 11);
|
||||
*(a958) = s155;
|
||||
a959 = (a818 + 6);
|
||||
s156 = *(a959);
|
||||
a960 = (a818 + 14);
|
||||
s157 = *(a960);
|
||||
a961 = (a824 + 6);
|
||||
a962 = *(a961);
|
||||
a963 = _mm_xor_si128(a823, a962);
|
||||
a964 = (a824 + 14);
|
||||
a965 = *(a964);
|
||||
a966 = _mm_xor_si128(a829, a965);
|
||||
a967 = (a824 + 22);
|
||||
a968 = *(a967);
|
||||
a969 = _mm_xor_si128(a835, a968);
|
||||
a970 = (a824 + 30);
|
||||
a971 = *(a970);
|
||||
a972 = _mm_xor_si128(a841, a971);
|
||||
b117 = _mm_add_epi32(a963, a966);
|
||||
b118 = _mm_add_epi32(b117, a969);
|
||||
t51 = _mm_add_epi32(b118, a972);
|
||||
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
|
||||
m97 = _mm_add_epi32(s156, t51);
|
||||
m98 = _mm_add_epi32(s157, t52);
|
||||
m99 = _mm_add_epi32(s156, t52);
|
||||
m100 = _mm_add_epi32(s157, t51);
|
||||
d49 = _mm_cmpgt_epi32(m97, m98);
|
||||
d50 = _mm_cmpgt_epi32(m99, m100);
|
||||
a973 = _mm_andnot_si128(d49, m97);
|
||||
a974 = _mm_and_si128(d49, m98);
|
||||
s158 = _mm_or_si128(a973, a974);
|
||||
a975 = _mm_andnot_si128(d50, m99);
|
||||
a976 = _mm_and_si128(d50, m100);
|
||||
s159 = _mm_or_si128(a975, a976);
|
||||
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a977 = (a851 + 6);
|
||||
*(a977) = s160;
|
||||
s161 = _mm_unpacklo_epi32(s158, s159);
|
||||
s162 = _mm_unpackhi_epi32(s158, s159);
|
||||
a978 = (a852 + 12);
|
||||
*(a978) = s161;
|
||||
a979 = (a852 + 13);
|
||||
*(a979) = s162;
|
||||
a980 = (a818 + 7);
|
||||
s163 = *(a980);
|
||||
a981 = (a818 + 15);
|
||||
s164 = *(a981);
|
||||
a982 = (a824 + 7);
|
||||
a983 = *(a982);
|
||||
a984 = _mm_xor_si128(a823, a983);
|
||||
a985 = (a824 + 15);
|
||||
a986 = *(a985);
|
||||
a987 = _mm_xor_si128(a829, a986);
|
||||
a988 = (a824 + 23);
|
||||
a989 = *(a988);
|
||||
a990 = _mm_xor_si128(a835, a989);
|
||||
a991 = (a824 + 31);
|
||||
a992 = *(a991);
|
||||
a993 = _mm_xor_si128(a841, a992);
|
||||
b119 = _mm_add_epi32(a984, a987);
|
||||
b120 = _mm_add_epi32(b119, a990);
|
||||
t53 = _mm_add_epi32(b120, a993);
|
||||
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
|
||||
m101 = _mm_add_epi32(s163, t53);
|
||||
m102 = _mm_add_epi32(s164, t54);
|
||||
m103 = _mm_add_epi32(s163, t54);
|
||||
m104 = _mm_add_epi32(s164, t53);
|
||||
d51 = _mm_cmpgt_epi32(m101, m102);
|
||||
d52 = _mm_cmpgt_epi32(m103, m104);
|
||||
a994 = _mm_andnot_si128(d51, m101);
|
||||
a995 = _mm_and_si128(d51, m102);
|
||||
s165 = _mm_or_si128(a994, a995);
|
||||
a996 = _mm_andnot_si128(d52, m103);
|
||||
a997 = _mm_and_si128(d52, m104);
|
||||
s166 = _mm_or_si128(a996, a997);
|
||||
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a998 = (a851 + 7);
|
||||
*(a998) = s167;
|
||||
s168 = _mm_unpacklo_epi32(s165, s166);
|
||||
s169 = _mm_unpackhi_epi32(s165, s166);
|
||||
a999 = (a852 + 14);
|
||||
*(a999) = s168;
|
||||
a1000 = (a852 + 15);
|
||||
*(a1000) = s169;
|
||||
s170 = *(a852);
|
||||
s171 = *(a936);
|
||||
a1001 = (b104 + 4);
|
||||
a1002 = *(a1001);
|
||||
a1003 = _mm_set1_epi32(a1002);
|
||||
a1004 = _mm_xor_si128(a1003, a825);
|
||||
a1005 = (b104 + 5);
|
||||
a1006 = *(a1005);
|
||||
a1007 = _mm_set1_epi32(a1006);
|
||||
a1008 = _mm_xor_si128(a1007, a831);
|
||||
a1009 = (b104 + 6);
|
||||
a1010 = *(a1009);
|
||||
a1011 = _mm_set1_epi32(a1010);
|
||||
a1012 = _mm_xor_si128(a1011, a837);
|
||||
a1013 = (b104 + 7);
|
||||
a1014 = *(a1013);
|
||||
a1015 = _mm_set1_epi32(a1014);
|
||||
a1016 = _mm_xor_si128(a1015, a843);
|
||||
b121 = _mm_add_epi32(a1004, a1008);
|
||||
b122 = _mm_add_epi32(b121, a1012);
|
||||
t55 = _mm_add_epi32(b122, a1016);
|
||||
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
|
||||
m105 = _mm_add_epi32(s170, t55);
|
||||
m106 = _mm_add_epi32(s171, t56);
|
||||
m107 = _mm_add_epi32(s170, t56);
|
||||
m108 = _mm_add_epi32(s171, t55);
|
||||
d53 = _mm_cmpgt_epi32(m105, m106);
|
||||
d54 = _mm_cmpgt_epi32(m107, m108);
|
||||
a1017 = _mm_andnot_si128(d53, m105);
|
||||
a1018 = _mm_and_si128(d53, m106);
|
||||
s172 = _mm_or_si128(a1017, a1018);
|
||||
a1019 = _mm_andnot_si128(d54, m107);
|
||||
a1020 = _mm_and_si128(d54, m108);
|
||||
s173 = _mm_or_si128(a1019, a1020);
|
||||
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1021 = (a851 + 8);
|
||||
*(a1021) = s174;
|
||||
s175 = _mm_unpacklo_epi32(s172, s173);
|
||||
s176 = _mm_unpackhi_epi32(s172, s173);
|
||||
*(a818) = s175;
|
||||
*(a854) = s176;
|
||||
s177 = *(a853);
|
||||
s178 = *(a937);
|
||||
a1022 = _mm_xor_si128(a1003, a857);
|
||||
a1023 = _mm_xor_si128(a1007, a860);
|
||||
a1024 = _mm_xor_si128(a1011, a863);
|
||||
a1025 = _mm_xor_si128(a1015, a866);
|
||||
b123 = _mm_add_epi32(a1022, a1023);
|
||||
b124 = _mm_add_epi32(b123, a1024);
|
||||
t57 = _mm_add_epi32(b124, a1025);
|
||||
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
|
||||
m109 = _mm_add_epi32(s177, t57);
|
||||
m110 = _mm_add_epi32(s178, t58);
|
||||
m111 = _mm_add_epi32(s177, t58);
|
||||
m112 = _mm_add_epi32(s178, t57);
|
||||
d55 = _mm_cmpgt_epi32(m109, m110);
|
||||
d56 = _mm_cmpgt_epi32(m111, m112);
|
||||
a1026 = _mm_andnot_si128(d55, m109);
|
||||
a1027 = _mm_and_si128(d55, m110);
|
||||
s179 = _mm_or_si128(a1026, a1027);
|
||||
a1028 = _mm_andnot_si128(d56, m111);
|
||||
a1029 = _mm_and_si128(d56, m112);
|
||||
s180 = _mm_or_si128(a1028, a1029);
|
||||
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1030 = (a851 + 9);
|
||||
*(a1030) = s181;
|
||||
s182 = _mm_unpacklo_epi32(s179, s180);
|
||||
s183 = _mm_unpackhi_epi32(s179, s180);
|
||||
*(a875) = s182;
|
||||
*(a896) = s183;
|
||||
s184 = *(a873);
|
||||
s185 = *(a957);
|
||||
a1031 = _mm_xor_si128(a1003, a878);
|
||||
a1032 = _mm_xor_si128(a1007, a881);
|
||||
a1033 = _mm_xor_si128(a1011, a884);
|
||||
a1034 = _mm_xor_si128(a1015, a887);
|
||||
b125 = _mm_add_epi32(a1031, a1032);
|
||||
b126 = _mm_add_epi32(b125, a1033);
|
||||
t59 = _mm_add_epi32(b126, a1034);
|
||||
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
|
||||
m113 = _mm_add_epi32(s184, t59);
|
||||
m114 = _mm_add_epi32(s185, t60);
|
||||
m115 = _mm_add_epi32(s184, t60);
|
||||
m116 = _mm_add_epi32(s185, t59);
|
||||
d57 = _mm_cmpgt_epi32(m113, m114);
|
||||
d58 = _mm_cmpgt_epi32(m115, m116);
|
||||
a1035 = _mm_andnot_si128(d57, m113);
|
||||
a1036 = _mm_and_si128(d57, m114);
|
||||
s186 = _mm_or_si128(a1035, a1036);
|
||||
a1037 = _mm_andnot_si128(d58, m115);
|
||||
a1038 = _mm_and_si128(d58, m116);
|
||||
s187 = _mm_or_si128(a1037, a1038);
|
||||
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1039 = (a851 + 10);
|
||||
*(a1039) = s188;
|
||||
s189 = _mm_unpacklo_epi32(s186, s187);
|
||||
s190 = _mm_unpackhi_epi32(s186, s187);
|
||||
*(a917) = s189;
|
||||
*(a938) = s190;
|
||||
s191 = *(a874);
|
||||
s192 = *(a958);
|
||||
a1040 = _mm_xor_si128(a1003, a899);
|
||||
a1041 = _mm_xor_si128(a1007, a902);
|
||||
a1042 = _mm_xor_si128(a1011, a905);
|
||||
a1043 = _mm_xor_si128(a1015, a908);
|
||||
b127 = _mm_add_epi32(a1040, a1041);
|
||||
b128 = _mm_add_epi32(b127, a1042);
|
||||
t61 = _mm_add_epi32(b128, a1043);
|
||||
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
|
||||
m117 = _mm_add_epi32(s191, t61);
|
||||
m118 = _mm_add_epi32(s192, t62);
|
||||
m119 = _mm_add_epi32(s191, t62);
|
||||
m120 = _mm_add_epi32(s192, t61);
|
||||
d59 = _mm_cmpgt_epi32(m117, m118);
|
||||
d60 = _mm_cmpgt_epi32(m119, m120);
|
||||
a1044 = _mm_andnot_si128(d59, m117);
|
||||
a1045 = _mm_and_si128(d59, m118);
|
||||
s193 = _mm_or_si128(a1044, a1045);
|
||||
a1046 = _mm_andnot_si128(d60, m119);
|
||||
a1047 = _mm_and_si128(d60, m120);
|
||||
s194 = _mm_or_si128(a1046, a1047);
|
||||
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1048 = (a851 + 11);
|
||||
*(a1048) = s195;
|
||||
s196 = _mm_unpacklo_epi32(s193, s194);
|
||||
s197 = _mm_unpackhi_epi32(s193, s194);
|
||||
*(a959) = s196;
|
||||
*(a980) = s197;
|
||||
s198 = *(a894);
|
||||
s199 = *(a978);
|
||||
a1049 = _mm_xor_si128(a1003, a920);
|
||||
a1050 = _mm_xor_si128(a1007, a923);
|
||||
a1051 = _mm_xor_si128(a1011, a926);
|
||||
a1052 = _mm_xor_si128(a1015, a929);
|
||||
b129 = _mm_add_epi32(a1049, a1050);
|
||||
b130 = _mm_add_epi32(b129, a1051);
|
||||
t63 = _mm_add_epi32(b130, a1052);
|
||||
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
|
||||
m121 = _mm_add_epi32(s198, t63);
|
||||
m122 = _mm_add_epi32(s199, t64);
|
||||
m123 = _mm_add_epi32(s198, t64);
|
||||
m124 = _mm_add_epi32(s199, t63);
|
||||
d61 = _mm_cmpgt_epi32(m121, m122);
|
||||
d62 = _mm_cmpgt_epi32(m123, m124);
|
||||
a1053 = _mm_andnot_si128(d61, m121);
|
||||
a1054 = _mm_and_si128(d61, m122);
|
||||
s200 = _mm_or_si128(a1053, a1054);
|
||||
a1055 = _mm_andnot_si128(d62, m123);
|
||||
a1056 = _mm_and_si128(d62, m124);
|
||||
s201 = _mm_or_si128(a1055, a1056);
|
||||
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1057 = (a851 + 12);
|
||||
*(a1057) = s202;
|
||||
s203 = _mm_unpacklo_epi32(s200, s201);
|
||||
s204 = _mm_unpackhi_epi32(s200, s201);
|
||||
*(a819) = s203;
|
||||
*(a855) = s204;
|
||||
s205 = *(a895);
|
||||
s206 = *(a979);
|
||||
a1058 = _mm_xor_si128(a1003, a941);
|
||||
a1059 = _mm_xor_si128(a1007, a944);
|
||||
a1060 = _mm_xor_si128(a1011, a947);
|
||||
a1061 = _mm_xor_si128(a1015, a950);
|
||||
b131 = _mm_add_epi32(a1058, a1059);
|
||||
b132 = _mm_add_epi32(b131, a1060);
|
||||
t65 = _mm_add_epi32(b132, a1061);
|
||||
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
|
||||
m125 = _mm_add_epi32(s205, t65);
|
||||
m126 = _mm_add_epi32(s206, t66);
|
||||
m127 = _mm_add_epi32(s205, t66);
|
||||
m128 = _mm_add_epi32(s206, t65);
|
||||
d63 = _mm_cmpgt_epi32(m125, m126);
|
||||
d64 = _mm_cmpgt_epi32(m127, m128);
|
||||
a1062 = _mm_andnot_si128(d63, m125);
|
||||
a1063 = _mm_and_si128(d63, m126);
|
||||
s207 = _mm_or_si128(a1062, a1063);
|
||||
a1064 = _mm_andnot_si128(d64, m127);
|
||||
a1065 = _mm_and_si128(d64, m128);
|
||||
s208 = _mm_or_si128(a1064, a1065);
|
||||
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1066 = (a851 + 13);
|
||||
*(a1066) = s209;
|
||||
s210 = _mm_unpacklo_epi32(s207, s208);
|
||||
s211 = _mm_unpackhi_epi32(s207, s208);
|
||||
*(a876) = s210;
|
||||
*(a897) = s211;
|
||||
s212 = *(a915);
|
||||
s213 = *(a999);
|
||||
a1067 = _mm_xor_si128(a1003, a962);
|
||||
a1068 = _mm_xor_si128(a1007, a965);
|
||||
a1069 = _mm_xor_si128(a1011, a968);
|
||||
a1070 = _mm_xor_si128(a1015, a971);
|
||||
b133 = _mm_add_epi32(a1067, a1068);
|
||||
b134 = _mm_add_epi32(b133, a1069);
|
||||
t67 = _mm_add_epi32(b134, a1070);
|
||||
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
|
||||
m129 = _mm_add_epi32(s212, t67);
|
||||
m130 = _mm_add_epi32(s213, t68);
|
||||
m131 = _mm_add_epi32(s212, t68);
|
||||
m132 = _mm_add_epi32(s213, t67);
|
||||
d65 = _mm_cmpgt_epi32(m129, m130);
|
||||
d66 = _mm_cmpgt_epi32(m131, m132);
|
||||
a1071 = _mm_andnot_si128(d65, m129);
|
||||
a1072 = _mm_and_si128(d65, m130);
|
||||
s214 = _mm_or_si128(a1071, a1072);
|
||||
a1073 = _mm_andnot_si128(d66, m131);
|
||||
a1074 = _mm_and_si128(d66, m132);
|
||||
s215 = _mm_or_si128(a1073, a1074);
|
||||
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1075 = (a851 + 14);
|
||||
*(a1075) = s216;
|
||||
s217 = _mm_unpacklo_epi32(s214, s215);
|
||||
s218 = _mm_unpackhi_epi32(s214, s215);
|
||||
*(a918) = s217;
|
||||
*(a939) = s218;
|
||||
s219 = *(a916);
|
||||
s220 = *(a1000);
|
||||
a1076 = _mm_xor_si128(a1003, a983);
|
||||
a1077 = _mm_xor_si128(a1007, a986);
|
||||
a1078 = _mm_xor_si128(a1011, a989);
|
||||
a1079 = _mm_xor_si128(a1015, a992);
|
||||
b135 = _mm_add_epi32(a1076, a1077);
|
||||
b136 = _mm_add_epi32(b135, a1078);
|
||||
t69 = _mm_add_epi32(b136, a1079);
|
||||
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
|
||||
m133 = _mm_add_epi32(s219, t69);
|
||||
m134 = _mm_add_epi32(s220, t70);
|
||||
m135 = _mm_add_epi32(s219, t70);
|
||||
m136 = _mm_add_epi32(s220, t69);
|
||||
d67 = _mm_cmpgt_epi32(m133, m134);
|
||||
d68 = _mm_cmpgt_epi32(m135, m136);
|
||||
a1080 = _mm_andnot_si128(d67, m133);
|
||||
a1081 = _mm_and_si128(d67, m134);
|
||||
s221 = _mm_or_si128(a1080, a1081);
|
||||
a1082 = _mm_andnot_si128(d68, m135);
|
||||
a1083 = _mm_and_si128(d68, m136);
|
||||
s222 = _mm_or_si128(a1082, a1083);
|
||||
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1084 = (a851 + 15);
|
||||
*(a1084) = s223;
|
||||
s224 = _mm_unpacklo_epi32(s221, s222);
|
||||
s225 = _mm_unpackhi_epi32(s221, s222);
|
||||
*(a960) = s224;
|
||||
*(a981) = s225;
|
||||
}
|
||||
/* skip */
|
||||
}
|
||||
#endif
|
@@ -1,36 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
#include <stdint.h>
|
||||
|
||||
#define K 7
|
||||
#define RATE 4
|
||||
#define POLYS { 109, 79, 83, 109 }
|
||||
#define NUMSTATES 64
|
||||
#define FRAMEBITS 2048
|
||||
#define DECISIONTYPE unsigned int
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE uint32_t
|
||||
#define EBN0 3
|
||||
#define TRIALS 10000
|
||||
#define __int32 int
|
||||
#define FUNC FULL_SPIRAL
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 2000000000
|
File diff suppressed because it is too large
Load Diff
@@ -1,35 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
|
||||
#define K 7
|
||||
#define RATE 4
|
||||
#define POLYS { 109, 79, 83, 109 }
|
||||
#define NUMSTATES 64
|
||||
#define FRAMEBITS 2048
|
||||
#define DECISIONTYPE unsigned int
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE unsigned int
|
||||
#define EBN0 3
|
||||
#define TRIALS 10000
|
||||
#define __int32 int
|
||||
#define FUNC FULL_SPIRAL
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 2000000000
|
@@ -1,698 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
|
||||
//#include <include/mm_malloc.h>
|
||||
//#include <pmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <mmintrin.h>
|
||||
#include "spiral-sse.h"
|
||||
void init_FULL_SPIRAL() {
|
||||
}
|
||||
|
||||
void FULL_SPIRAL_sse(int amount, int32_t *Y, int32_t *X, int32_t *syms, unsigned char *dec, int32_t *Branchtab) {
|
||||
int i9;
|
||||
// for(i9 = 0; i9 <= amount; i9++) {
|
||||
for(i9 = 0; i9 < amount; i9++) {
|
||||
int32_t a1002, a1006, a1010, a1014, a822, a828, a834
|
||||
, a840;
|
||||
int a820, a850;
|
||||
unsigned char s118, s125, s132, s139, s146, s153, s160
|
||||
, s167, s174, s181, s188, s195, s202, s209, s216
|
||||
, s223;
|
||||
int32_t *a1001, *a1005, *a1009, *a1013, *a821, *a827, *a833
|
||||
, *a839, *b104;
|
||||
unsigned char *a1021, *a1030, *a1039, *a1048, *a1057, *a1066, *a1075
|
||||
, *a1084, *a849, *a851, *a872, *a893, *a914, *a935, *a956
|
||||
, *a977, *a998;
|
||||
__m128i *a1000, *a818, *a819, *a824, *a830, *a836, *a842
|
||||
, *a852, *a853, *a854, *a855, *a856, *a859, *a862, *a865
|
||||
, *a873, *a874, *a875, *a876, *a877, *a880, *a883, *a886
|
||||
, *a894, *a895, *a896, *a897, *a898, *a901, *a904, *a907
|
||||
, *a915, *a916, *a917, *a918, *a919, *a922, *a925, *a928
|
||||
, *a936, *a937, *a938, *a939, *a940, *a943, *a946, *a949
|
||||
, *a957, *a958, *a959, *a960, *a961, *a964, *a967, *a970
|
||||
, *a978, *a979, *a980, *a981, *a982, *a985, *a988, *a991
|
||||
, *a999;
|
||||
__m128i a1003, a1004, a1007, a1008, a1011, a1012, a1015
|
||||
, a1016, a1017, a1018, a1019, a1020, a1022, a1023, a1024
|
||||
, a1025, a1026, a1027, a1028, a1029, a1031, a1032, a1033
|
||||
, a1034, a1035, a1036, a1037, a1038, a1040, a1041, a1042
|
||||
, a1043, a1044, a1045, a1046, a1047, a1049, a1050, a1051
|
||||
, a1052, a1053, a1054, a1055, a1056, a1058, a1059, a1060
|
||||
, a1061, a1062, a1063, a1064, a1065, a1067, a1068, a1069
|
||||
, a1070, a1071, a1072, a1073, a1074, a1076, a1077, a1078
|
||||
, a1079, a1080, a1081, a1082, a1083, a823, a825, a826
|
||||
, a829, a831, a832, a835, a837, a838, a841, a843
|
||||
, a844, a845, a846, a847, a848, a857, a858, a860
|
||||
, a861, a863, a864, a866, a867, a868, a869, a870
|
||||
, a871, a878, a879, a881, a882, a884, a885, a887
|
||||
, a888, a889, a890, a891, a892, a899, a900, a902
|
||||
, a903, a905, a906, a908, a909, a910, a911, a912
|
||||
, a913, a920, a921, a923, a924, a926, a927, a929
|
||||
, a930, a931, a932, a933, a934, a941, a942, a944
|
||||
, a945, a947, a948, a950, a951, a952, a953, a954
|
||||
, a955, a962, a963, a965, a966, a968, a969, a971
|
||||
, a972, a973, a974, a975, a976, a983, a984, a986
|
||||
, a987, a989, a990, a992, a993, a994, a995, a996
|
||||
, a997, b105, b106, b107, b108, b109, b110, b111
|
||||
, b112, b113, b114, b115, b116, b117, b118, b119
|
||||
, b120, b121, b122, b123, b124, b125, b126, b127
|
||||
, b128, b129, b130, b131, b132, b133, b134, b135
|
||||
, b136, d37, d38, d39, d40, d41, d42, d43
|
||||
, d44, d45, d46, d47, d48, d49, d50, d51
|
||||
, d52, d53, d54, d55, d56, d57, d58, d59
|
||||
, d60, d61, d62, d63, d64, d65, d66, d67
|
||||
, d68, m100, m101, m102, m103, m104, m105, m106
|
||||
, m107, m108, m109, m110, m111, m112, m113, m114
|
||||
, m115, m116, m117, m118, m119, m120, m121, m122
|
||||
, m123, m124, m125, m126, m127, m128, m129, m130
|
||||
, m131, m132, m133, m134, m135, m136, m73, m74
|
||||
, m75, m76, m77, m78, m79, m80, m81, m82
|
||||
, m83, m84, m85, m86, m87, m88, m89, m90
|
||||
, m91, m92, m93, m94, m95, m96, m97, m98
|
||||
, m99, s114, s115, s116, s117, s119, s120, s121
|
||||
, s122, s123, s124, s126, s127, s128, s129, s130
|
||||
, s131, s133, s134, s135, s136, s137, s138, s140
|
||||
, s141, s142, s143, s144, s145, s147, s148, s149
|
||||
, s150, s151, s152, s154, s155, s156, s157, s158
|
||||
, s159, s161, s162, s163, s164, s165, s166, s168
|
||||
, s169, s170, s171, s172, s173, s175, s176, s177
|
||||
, s178, s179, s180, s182, s183, s184, s185, s186
|
||||
, s187, s189, s190, s191, s192, s193, s194, s196
|
||||
, s197, s198, s199, s200, s201, s203, s204, s205
|
||||
, s206, s207, s208, s210, s211, s212, s213, s214
|
||||
, s215, s217, s218, s219, s220, s221, s222, s224
|
||||
, s225, t39, t40, t41, t42, t43, t44, t45
|
||||
, t46, t47, t48, t49, t50, t51, t52, t53
|
||||
, t54, t55, t56, t57, t58, t59, t60, t61
|
||||
, t62, t63, t64, t65, t66, t67, t68, t69
|
||||
, t70;
|
||||
a818 = ((__m128i *) X);
|
||||
s114 = *(a818);
|
||||
a819 = (a818 + 8);
|
||||
s115 = *(a819);
|
||||
a820 = (8 * i9);
|
||||
a821 = (syms + a820);
|
||||
a822 = *(a821);
|
||||
a823 = _mm_set1_epi32(a822);
|
||||
a824 = ((__m128i *) Branchtab);
|
||||
a825 = *(a824);
|
||||
a826 = _mm_xor_si128(a823, a825);
|
||||
b104 = (a820 + syms);
|
||||
a827 = (b104 + 1);
|
||||
a828 = *(a827);
|
||||
a829 = _mm_set1_epi32(a828);
|
||||
a830 = (a824 + 8);
|
||||
a831 = *(a830);
|
||||
a832 = _mm_xor_si128(a829, a831);
|
||||
a833 = (b104 + 2);
|
||||
a834 = *(a833);
|
||||
a835 = _mm_set1_epi32(a834);
|
||||
a836 = (a824 + 16);
|
||||
a837 = *(a836);
|
||||
a838 = _mm_xor_si128(a835, a837);
|
||||
a839 = (b104 + 3);
|
||||
a840 = *(a839);
|
||||
a841 = _mm_set1_epi32(a840);
|
||||
a842 = (a824 + 24);
|
||||
a843 = *(a842);
|
||||
a844 = _mm_xor_si128(a841, a843);
|
||||
b105 = _mm_add_epi32(a826, a832);
|
||||
b106 = _mm_add_epi32(b105, a838);
|
||||
t39 = _mm_add_epi32(b106, a844);
|
||||
t40 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t39);
|
||||
m73 = _mm_add_epi32(s114, t39);
|
||||
m74 = _mm_add_epi32(s115, t40);
|
||||
m75 = _mm_add_epi32(s114, t40);
|
||||
m76 = _mm_add_epi32(s115, t39);
|
||||
d37 = _mm_cmpgt_epi32(m73, m74);
|
||||
d38 = _mm_cmpgt_epi32(m75, m76);
|
||||
a845 = _mm_andnot_si128(d37, m73);
|
||||
a846 = _mm_and_si128(d37, m74);
|
||||
s116 = _mm_or_si128(a845, a846);
|
||||
a847 = _mm_andnot_si128(d38, m75);
|
||||
a848 = _mm_and_si128(d38, m76);
|
||||
s117 = _mm_or_si128(a847, a848);
|
||||
s118 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d37,_mm_setzero_si128()),_mm_packs_epi16(d38,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a849 = ((unsigned char *) dec);
|
||||
a850 = (16 * i9);
|
||||
a851 = (a849 + a850);
|
||||
*(a851) = s118;
|
||||
s119 = _mm_unpacklo_epi32(s116, s117);
|
||||
s120 = _mm_unpackhi_epi32(s116, s117);
|
||||
a852 = ((__m128i *) Y);
|
||||
*(a852) = s119;
|
||||
a853 = (a852 + 1);
|
||||
*(a853) = s120;
|
||||
a854 = (a818 + 1);
|
||||
s121 = *(a854);
|
||||
a855 = (a818 + 9);
|
||||
s122 = *(a855);
|
||||
a856 = (a824 + 1);
|
||||
a857 = *(a856);
|
||||
a858 = _mm_xor_si128(a823, a857);
|
||||
a859 = (a824 + 9);
|
||||
a860 = *(a859);
|
||||
a861 = _mm_xor_si128(a829, a860);
|
||||
a862 = (a824 + 17);
|
||||
a863 = *(a862);
|
||||
a864 = _mm_xor_si128(a835, a863);
|
||||
a865 = (a824 + 25);
|
||||
a866 = *(a865);
|
||||
a867 = _mm_xor_si128(a841, a866);
|
||||
b107 = _mm_add_epi32(a858, a861);
|
||||
b108 = _mm_add_epi32(b107, a864);
|
||||
t41 = _mm_add_epi32(b108, a867);
|
||||
t42 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t41);
|
||||
m77 = _mm_add_epi32(s121, t41);
|
||||
m78 = _mm_add_epi32(s122, t42);
|
||||
m79 = _mm_add_epi32(s121, t42);
|
||||
m80 = _mm_add_epi32(s122, t41);
|
||||
d39 = _mm_cmpgt_epi32(m77, m78);
|
||||
d40 = _mm_cmpgt_epi32(m79, m80);
|
||||
a868 = _mm_andnot_si128(d39, m77);
|
||||
a869 = _mm_and_si128(d39, m78);
|
||||
s123 = _mm_or_si128(a868, a869);
|
||||
a870 = _mm_andnot_si128(d40, m79);
|
||||
a871 = _mm_and_si128(d40, m80);
|
||||
s124 = _mm_or_si128(a870, a871);
|
||||
s125 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d39,_mm_setzero_si128()),_mm_packs_epi16(d40,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a872 = (a851 + 1);
|
||||
*(a872) = s125;
|
||||
s126 = _mm_unpacklo_epi32(s123, s124);
|
||||
s127 = _mm_unpackhi_epi32(s123, s124);
|
||||
a873 = (a852 + 2);
|
||||
*(a873) = s126;
|
||||
a874 = (a852 + 3);
|
||||
*(a874) = s127;
|
||||
a875 = (a818 + 2);
|
||||
s128 = *(a875);
|
||||
a876 = (a818 + 10);
|
||||
s129 = *(a876);
|
||||
a877 = (a824 + 2);
|
||||
a878 = *(a877);
|
||||
a879 = _mm_xor_si128(a823, a878);
|
||||
a880 = (a824 + 10);
|
||||
a881 = *(a880);
|
||||
a882 = _mm_xor_si128(a829, a881);
|
||||
a883 = (a824 + 18);
|
||||
a884 = *(a883);
|
||||
a885 = _mm_xor_si128(a835, a884);
|
||||
a886 = (a824 + 26);
|
||||
a887 = *(a886);
|
||||
a888 = _mm_xor_si128(a841, a887);
|
||||
b109 = _mm_add_epi32(a879, a882);
|
||||
b110 = _mm_add_epi32(b109, a885);
|
||||
t43 = _mm_add_epi32(b110, a888);
|
||||
t44 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t43);
|
||||
m81 = _mm_add_epi32(s128, t43);
|
||||
m82 = _mm_add_epi32(s129, t44);
|
||||
m83 = _mm_add_epi32(s128, t44);
|
||||
m84 = _mm_add_epi32(s129, t43);
|
||||
d41 = _mm_cmpgt_epi32(m81, m82);
|
||||
d42 = _mm_cmpgt_epi32(m83, m84);
|
||||
a889 = _mm_andnot_si128(d41, m81);
|
||||
a890 = _mm_and_si128(d41, m82);
|
||||
s130 = _mm_or_si128(a889, a890);
|
||||
a891 = _mm_andnot_si128(d42, m83);
|
||||
a892 = _mm_and_si128(d42, m84);
|
||||
s131 = _mm_or_si128(a891, a892);
|
||||
s132 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d41,_mm_setzero_si128()),_mm_packs_epi16(d42,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a893 = (a851 + 2);
|
||||
*(a893) = s132;
|
||||
s133 = _mm_unpacklo_epi32(s130, s131);
|
||||
s134 = _mm_unpackhi_epi32(s130, s131);
|
||||
a894 = (a852 + 4);
|
||||
*(a894) = s133;
|
||||
a895 = (a852 + 5);
|
||||
*(a895) = s134;
|
||||
a896 = (a818 + 3);
|
||||
s135 = *(a896);
|
||||
a897 = (a818 + 11);
|
||||
s136 = *(a897);
|
||||
a898 = (a824 + 3);
|
||||
a899 = *(a898);
|
||||
a900 = _mm_xor_si128(a823, a899);
|
||||
a901 = (a824 + 11);
|
||||
a902 = *(a901);
|
||||
a903 = _mm_xor_si128(a829, a902);
|
||||
a904 = (a824 + 19);
|
||||
a905 = *(a904);
|
||||
a906 = _mm_xor_si128(a835, a905);
|
||||
a907 = (a824 + 27);
|
||||
a908 = *(a907);
|
||||
a909 = _mm_xor_si128(a841, a908);
|
||||
b111 = _mm_add_epi32(a900, a903);
|
||||
b112 = _mm_add_epi32(b111, a906);
|
||||
t45 = _mm_add_epi32(b112, a909);
|
||||
t46 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t45);
|
||||
m85 = _mm_add_epi32(s135, t45);
|
||||
m86 = _mm_add_epi32(s136, t46);
|
||||
m87 = _mm_add_epi32(s135, t46);
|
||||
m88 = _mm_add_epi32(s136, t45);
|
||||
d43 = _mm_cmpgt_epi32(m85, m86);
|
||||
d44 = _mm_cmpgt_epi32(m87, m88);
|
||||
a910 = _mm_andnot_si128(d43, m85);
|
||||
a911 = _mm_and_si128(d43, m86);
|
||||
s137 = _mm_or_si128(a910, a911);
|
||||
a912 = _mm_andnot_si128(d44, m87);
|
||||
a913 = _mm_and_si128(d44, m88);
|
||||
s138 = _mm_or_si128(a912, a913);
|
||||
s139 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d43,_mm_setzero_si128()),_mm_packs_epi16(d44,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a914 = (a851 + 3);
|
||||
*(a914) = s139;
|
||||
s140 = _mm_unpacklo_epi32(s137, s138);
|
||||
s141 = _mm_unpackhi_epi32(s137, s138);
|
||||
a915 = (a852 + 6);
|
||||
*(a915) = s140;
|
||||
a916 = (a852 + 7);
|
||||
*(a916) = s141;
|
||||
a917 = (a818 + 4);
|
||||
s142 = *(a917);
|
||||
a918 = (a818 + 12);
|
||||
s143 = *(a918);
|
||||
a919 = (a824 + 4);
|
||||
a920 = *(a919);
|
||||
a921 = _mm_xor_si128(a823, a920);
|
||||
a922 = (a824 + 12);
|
||||
a923 = *(a922);
|
||||
a924 = _mm_xor_si128(a829, a923);
|
||||
a925 = (a824 + 20);
|
||||
a926 = *(a925);
|
||||
a927 = _mm_xor_si128(a835, a926);
|
||||
a928 = (a824 + 28);
|
||||
a929 = *(a928);
|
||||
a930 = _mm_xor_si128(a841, a929);
|
||||
b113 = _mm_add_epi32(a921, a924);
|
||||
b114 = _mm_add_epi32(b113, a927);
|
||||
t47 = _mm_add_epi32(b114, a930);
|
||||
t48 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t47);
|
||||
m89 = _mm_add_epi32(s142, t47);
|
||||
m90 = _mm_add_epi32(s143, t48);
|
||||
m91 = _mm_add_epi32(s142, t48);
|
||||
m92 = _mm_add_epi32(s143, t47);
|
||||
d45 = _mm_cmpgt_epi32(m89, m90);
|
||||
d46 = _mm_cmpgt_epi32(m91, m92);
|
||||
a931 = _mm_andnot_si128(d45, m89);
|
||||
a932 = _mm_and_si128(d45, m90);
|
||||
s144 = _mm_or_si128(a931, a932);
|
||||
a933 = _mm_andnot_si128(d46, m91);
|
||||
a934 = _mm_and_si128(d46, m92);
|
||||
s145 = _mm_or_si128(a933, a934);
|
||||
s146 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d45,_mm_setzero_si128()),_mm_packs_epi16(d46,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a935 = (a851 + 4);
|
||||
*(a935) = s146;
|
||||
s147 = _mm_unpacklo_epi32(s144, s145);
|
||||
s148 = _mm_unpackhi_epi32(s144, s145);
|
||||
a936 = (a852 + 8);
|
||||
*(a936) = s147;
|
||||
a937 = (a852 + 9);
|
||||
*(a937) = s148;
|
||||
a938 = (a818 + 5);
|
||||
s149 = *(a938);
|
||||
a939 = (a818 + 13);
|
||||
s150 = *(a939);
|
||||
a940 = (a824 + 5);
|
||||
a941 = *(a940);
|
||||
a942 = _mm_xor_si128(a823, a941);
|
||||
a943 = (a824 + 13);
|
||||
a944 = *(a943);
|
||||
a945 = _mm_xor_si128(a829, a944);
|
||||
a946 = (a824 + 21);
|
||||
a947 = *(a946);
|
||||
a948 = _mm_xor_si128(a835, a947);
|
||||
a949 = (a824 + 29);
|
||||
a950 = *(a949);
|
||||
a951 = _mm_xor_si128(a841, a950);
|
||||
b115 = _mm_add_epi32(a942, a945);
|
||||
b116 = _mm_add_epi32(b115, a948);
|
||||
t49 = _mm_add_epi32(b116, a951);
|
||||
t50 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t49);
|
||||
m93 = _mm_add_epi32(s149, t49);
|
||||
m94 = _mm_add_epi32(s150, t50);
|
||||
m95 = _mm_add_epi32(s149, t50);
|
||||
m96 = _mm_add_epi32(s150, t49);
|
||||
d47 = _mm_cmpgt_epi32(m93, m94);
|
||||
d48 = _mm_cmpgt_epi32(m95, m96);
|
||||
a952 = _mm_andnot_si128(d47, m93);
|
||||
a953 = _mm_and_si128(d47, m94);
|
||||
s151 = _mm_or_si128(a952, a953);
|
||||
a954 = _mm_andnot_si128(d48, m95);
|
||||
a955 = _mm_and_si128(d48, m96);
|
||||
s152 = _mm_or_si128(a954, a955);
|
||||
s153 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d47,_mm_setzero_si128()),_mm_packs_epi16(d48,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a956 = (a851 + 5);
|
||||
*(a956) = s153;
|
||||
s154 = _mm_unpacklo_epi32(s151, s152);
|
||||
s155 = _mm_unpackhi_epi32(s151, s152);
|
||||
a957 = (a852 + 10);
|
||||
*(a957) = s154;
|
||||
a958 = (a852 + 11);
|
||||
*(a958) = s155;
|
||||
a959 = (a818 + 6);
|
||||
s156 = *(a959);
|
||||
a960 = (a818 + 14);
|
||||
s157 = *(a960);
|
||||
a961 = (a824 + 6);
|
||||
a962 = *(a961);
|
||||
a963 = _mm_xor_si128(a823, a962);
|
||||
a964 = (a824 + 14);
|
||||
a965 = *(a964);
|
||||
a966 = _mm_xor_si128(a829, a965);
|
||||
a967 = (a824 + 22);
|
||||
a968 = *(a967);
|
||||
a969 = _mm_xor_si128(a835, a968);
|
||||
a970 = (a824 + 30);
|
||||
a971 = *(a970);
|
||||
a972 = _mm_xor_si128(a841, a971);
|
||||
b117 = _mm_add_epi32(a963, a966);
|
||||
b118 = _mm_add_epi32(b117, a969);
|
||||
t51 = _mm_add_epi32(b118, a972);
|
||||
t52 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t51);
|
||||
m97 = _mm_add_epi32(s156, t51);
|
||||
m98 = _mm_add_epi32(s157, t52);
|
||||
m99 = _mm_add_epi32(s156, t52);
|
||||
m100 = _mm_add_epi32(s157, t51);
|
||||
d49 = _mm_cmpgt_epi32(m97, m98);
|
||||
d50 = _mm_cmpgt_epi32(m99, m100);
|
||||
a973 = _mm_andnot_si128(d49, m97);
|
||||
a974 = _mm_and_si128(d49, m98);
|
||||
s158 = _mm_or_si128(a973, a974);
|
||||
a975 = _mm_andnot_si128(d50, m99);
|
||||
a976 = _mm_and_si128(d50, m100);
|
||||
s159 = _mm_or_si128(a975, a976);
|
||||
s160 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d49,_mm_setzero_si128()),_mm_packs_epi16(d50,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a977 = (a851 + 6);
|
||||
*(a977) = s160;
|
||||
s161 = _mm_unpacklo_epi32(s158, s159);
|
||||
s162 = _mm_unpackhi_epi32(s158, s159);
|
||||
a978 = (a852 + 12);
|
||||
*(a978) = s161;
|
||||
a979 = (a852 + 13);
|
||||
*(a979) = s162;
|
||||
a980 = (a818 + 7);
|
||||
s163 = *(a980);
|
||||
a981 = (a818 + 15);
|
||||
s164 = *(a981);
|
||||
a982 = (a824 + 7);
|
||||
a983 = *(a982);
|
||||
a984 = _mm_xor_si128(a823, a983);
|
||||
a985 = (a824 + 15);
|
||||
a986 = *(a985);
|
||||
a987 = _mm_xor_si128(a829, a986);
|
||||
a988 = (a824 + 23);
|
||||
a989 = *(a988);
|
||||
a990 = _mm_xor_si128(a835, a989);
|
||||
a991 = (a824 + 31);
|
||||
a992 = *(a991);
|
||||
a993 = _mm_xor_si128(a841, a992);
|
||||
b119 = _mm_add_epi32(a984, a987);
|
||||
b120 = _mm_add_epi32(b119, a990);
|
||||
t53 = _mm_add_epi32(b120, a993);
|
||||
t54 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t53);
|
||||
m101 = _mm_add_epi32(s163, t53);
|
||||
m102 = _mm_add_epi32(s164, t54);
|
||||
m103 = _mm_add_epi32(s163, t54);
|
||||
m104 = _mm_add_epi32(s164, t53);
|
||||
d51 = _mm_cmpgt_epi32(m101, m102);
|
||||
d52 = _mm_cmpgt_epi32(m103, m104);
|
||||
a994 = _mm_andnot_si128(d51, m101);
|
||||
a995 = _mm_and_si128(d51, m102);
|
||||
s165 = _mm_or_si128(a994, a995);
|
||||
a996 = _mm_andnot_si128(d52, m103);
|
||||
a997 = _mm_and_si128(d52, m104);
|
||||
s166 = _mm_or_si128(a996, a997);
|
||||
s167 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d51,_mm_setzero_si128()),_mm_packs_epi16(d52,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a998 = (a851 + 7);
|
||||
*(a998) = s167;
|
||||
s168 = _mm_unpacklo_epi32(s165, s166);
|
||||
s169 = _mm_unpackhi_epi32(s165, s166);
|
||||
a999 = (a852 + 14);
|
||||
*(a999) = s168;
|
||||
a1000 = (a852 + 15);
|
||||
*(a1000) = s169;
|
||||
s170 = *(a852);
|
||||
s171 = *(a936);
|
||||
a1001 = (b104 + 4);
|
||||
a1002 = *(a1001);
|
||||
a1003 = _mm_set1_epi32(a1002);
|
||||
a1004 = _mm_xor_si128(a1003, a825);
|
||||
a1005 = (b104 + 5);
|
||||
a1006 = *(a1005);
|
||||
a1007 = _mm_set1_epi32(a1006);
|
||||
a1008 = _mm_xor_si128(a1007, a831);
|
||||
a1009 = (b104 + 6);
|
||||
a1010 = *(a1009);
|
||||
a1011 = _mm_set1_epi32(a1010);
|
||||
a1012 = _mm_xor_si128(a1011, a837);
|
||||
a1013 = (b104 + 7);
|
||||
a1014 = *(a1013);
|
||||
a1015 = _mm_set1_epi32(a1014);
|
||||
a1016 = _mm_xor_si128(a1015, a843);
|
||||
b121 = _mm_add_epi32(a1004, a1008);
|
||||
b122 = _mm_add_epi32(b121, a1012);
|
||||
t55 = _mm_add_epi32(b122, a1016);
|
||||
t56 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t55);
|
||||
m105 = _mm_add_epi32(s170, t55);
|
||||
m106 = _mm_add_epi32(s171, t56);
|
||||
m107 = _mm_add_epi32(s170, t56);
|
||||
m108 = _mm_add_epi32(s171, t55);
|
||||
d53 = _mm_cmpgt_epi32(m105, m106);
|
||||
d54 = _mm_cmpgt_epi32(m107, m108);
|
||||
a1017 = _mm_andnot_si128(d53, m105);
|
||||
a1018 = _mm_and_si128(d53, m106);
|
||||
s172 = _mm_or_si128(a1017, a1018);
|
||||
a1019 = _mm_andnot_si128(d54, m107);
|
||||
a1020 = _mm_and_si128(d54, m108);
|
||||
s173 = _mm_or_si128(a1019, a1020);
|
||||
s174 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d53,_mm_setzero_si128()),_mm_packs_epi16(d54,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1021 = (a851 + 8);
|
||||
*(a1021) = s174;
|
||||
s175 = _mm_unpacklo_epi32(s172, s173);
|
||||
s176 = _mm_unpackhi_epi32(s172, s173);
|
||||
*(a818) = s175;
|
||||
*(a854) = s176;
|
||||
s177 = *(a853);
|
||||
s178 = *(a937);
|
||||
a1022 = _mm_xor_si128(a1003, a857);
|
||||
a1023 = _mm_xor_si128(a1007, a860);
|
||||
a1024 = _mm_xor_si128(a1011, a863);
|
||||
a1025 = _mm_xor_si128(a1015, a866);
|
||||
b123 = _mm_add_epi32(a1022, a1023);
|
||||
b124 = _mm_add_epi32(b123, a1024);
|
||||
t57 = _mm_add_epi32(b124, a1025);
|
||||
t58 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t57);
|
||||
m109 = _mm_add_epi32(s177, t57);
|
||||
m110 = _mm_add_epi32(s178, t58);
|
||||
m111 = _mm_add_epi32(s177, t58);
|
||||
m112 = _mm_add_epi32(s178, t57);
|
||||
d55 = _mm_cmpgt_epi32(m109, m110);
|
||||
d56 = _mm_cmpgt_epi32(m111, m112);
|
||||
a1026 = _mm_andnot_si128(d55, m109);
|
||||
a1027 = _mm_and_si128(d55, m110);
|
||||
s179 = _mm_or_si128(a1026, a1027);
|
||||
a1028 = _mm_andnot_si128(d56, m111);
|
||||
a1029 = _mm_and_si128(d56, m112);
|
||||
s180 = _mm_or_si128(a1028, a1029);
|
||||
s181 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d55,_mm_setzero_si128()),_mm_packs_epi16(d56,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1030 = (a851 + 9);
|
||||
*(a1030) = s181;
|
||||
s182 = _mm_unpacklo_epi32(s179, s180);
|
||||
s183 = _mm_unpackhi_epi32(s179, s180);
|
||||
*(a875) = s182;
|
||||
*(a896) = s183;
|
||||
s184 = *(a873);
|
||||
s185 = *(a957);
|
||||
a1031 = _mm_xor_si128(a1003, a878);
|
||||
a1032 = _mm_xor_si128(a1007, a881);
|
||||
a1033 = _mm_xor_si128(a1011, a884);
|
||||
a1034 = _mm_xor_si128(a1015, a887);
|
||||
b125 = _mm_add_epi32(a1031, a1032);
|
||||
b126 = _mm_add_epi32(b125, a1033);
|
||||
t59 = _mm_add_epi32(b126, a1034);
|
||||
t60 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t59);
|
||||
m113 = _mm_add_epi32(s184, t59);
|
||||
m114 = _mm_add_epi32(s185, t60);
|
||||
m115 = _mm_add_epi32(s184, t60);
|
||||
m116 = _mm_add_epi32(s185, t59);
|
||||
d57 = _mm_cmpgt_epi32(m113, m114);
|
||||
d58 = _mm_cmpgt_epi32(m115, m116);
|
||||
a1035 = _mm_andnot_si128(d57, m113);
|
||||
a1036 = _mm_and_si128(d57, m114);
|
||||
s186 = _mm_or_si128(a1035, a1036);
|
||||
a1037 = _mm_andnot_si128(d58, m115);
|
||||
a1038 = _mm_and_si128(d58, m116);
|
||||
s187 = _mm_or_si128(a1037, a1038);
|
||||
s188 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d57,_mm_setzero_si128()),_mm_packs_epi16(d58,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1039 = (a851 + 10);
|
||||
*(a1039) = s188;
|
||||
s189 = _mm_unpacklo_epi32(s186, s187);
|
||||
s190 = _mm_unpackhi_epi32(s186, s187);
|
||||
*(a917) = s189;
|
||||
*(a938) = s190;
|
||||
s191 = *(a874);
|
||||
s192 = *(a958);
|
||||
a1040 = _mm_xor_si128(a1003, a899);
|
||||
a1041 = _mm_xor_si128(a1007, a902);
|
||||
a1042 = _mm_xor_si128(a1011, a905);
|
||||
a1043 = _mm_xor_si128(a1015, a908);
|
||||
b127 = _mm_add_epi32(a1040, a1041);
|
||||
b128 = _mm_add_epi32(b127, a1042);
|
||||
t61 = _mm_add_epi32(b128, a1043);
|
||||
t62 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t61);
|
||||
m117 = _mm_add_epi32(s191, t61);
|
||||
m118 = _mm_add_epi32(s192, t62);
|
||||
m119 = _mm_add_epi32(s191, t62);
|
||||
m120 = _mm_add_epi32(s192, t61);
|
||||
d59 = _mm_cmpgt_epi32(m117, m118);
|
||||
d60 = _mm_cmpgt_epi32(m119, m120);
|
||||
a1044 = _mm_andnot_si128(d59, m117);
|
||||
a1045 = _mm_and_si128(d59, m118);
|
||||
s193 = _mm_or_si128(a1044, a1045);
|
||||
a1046 = _mm_andnot_si128(d60, m119);
|
||||
a1047 = _mm_and_si128(d60, m120);
|
||||
s194 = _mm_or_si128(a1046, a1047);
|
||||
s195 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d59,_mm_setzero_si128()),_mm_packs_epi16(d60,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1048 = (a851 + 11);
|
||||
*(a1048) = s195;
|
||||
s196 = _mm_unpacklo_epi32(s193, s194);
|
||||
s197 = _mm_unpackhi_epi32(s193, s194);
|
||||
*(a959) = s196;
|
||||
*(a980) = s197;
|
||||
s198 = *(a894);
|
||||
s199 = *(a978);
|
||||
a1049 = _mm_xor_si128(a1003, a920);
|
||||
a1050 = _mm_xor_si128(a1007, a923);
|
||||
a1051 = _mm_xor_si128(a1011, a926);
|
||||
a1052 = _mm_xor_si128(a1015, a929);
|
||||
b129 = _mm_add_epi32(a1049, a1050);
|
||||
b130 = _mm_add_epi32(b129, a1051);
|
||||
t63 = _mm_add_epi32(b130, a1052);
|
||||
t64 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t63);
|
||||
m121 = _mm_add_epi32(s198, t63);
|
||||
m122 = _mm_add_epi32(s199, t64);
|
||||
m123 = _mm_add_epi32(s198, t64);
|
||||
m124 = _mm_add_epi32(s199, t63);
|
||||
d61 = _mm_cmpgt_epi32(m121, m122);
|
||||
d62 = _mm_cmpgt_epi32(m123, m124);
|
||||
a1053 = _mm_andnot_si128(d61, m121);
|
||||
a1054 = _mm_and_si128(d61, m122);
|
||||
s200 = _mm_or_si128(a1053, a1054);
|
||||
a1055 = _mm_andnot_si128(d62, m123);
|
||||
a1056 = _mm_and_si128(d62, m124);
|
||||
s201 = _mm_or_si128(a1055, a1056);
|
||||
s202 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d61,_mm_setzero_si128()),_mm_packs_epi16(d62,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1057 = (a851 + 12);
|
||||
*(a1057) = s202;
|
||||
s203 = _mm_unpacklo_epi32(s200, s201);
|
||||
s204 = _mm_unpackhi_epi32(s200, s201);
|
||||
*(a819) = s203;
|
||||
*(a855) = s204;
|
||||
s205 = *(a895);
|
||||
s206 = *(a979);
|
||||
a1058 = _mm_xor_si128(a1003, a941);
|
||||
a1059 = _mm_xor_si128(a1007, a944);
|
||||
a1060 = _mm_xor_si128(a1011, a947);
|
||||
a1061 = _mm_xor_si128(a1015, a950);
|
||||
b131 = _mm_add_epi32(a1058, a1059);
|
||||
b132 = _mm_add_epi32(b131, a1060);
|
||||
t65 = _mm_add_epi32(b132, a1061);
|
||||
t66 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t65);
|
||||
m125 = _mm_add_epi32(s205, t65);
|
||||
m126 = _mm_add_epi32(s206, t66);
|
||||
m127 = _mm_add_epi32(s205, t66);
|
||||
m128 = _mm_add_epi32(s206, t65);
|
||||
d63 = _mm_cmpgt_epi32(m125, m126);
|
||||
d64 = _mm_cmpgt_epi32(m127, m128);
|
||||
a1062 = _mm_andnot_si128(d63, m125);
|
||||
a1063 = _mm_and_si128(d63, m126);
|
||||
s207 = _mm_or_si128(a1062, a1063);
|
||||
a1064 = _mm_andnot_si128(d64, m127);
|
||||
a1065 = _mm_and_si128(d64, m128);
|
||||
s208 = _mm_or_si128(a1064, a1065);
|
||||
s209 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d63,_mm_setzero_si128()),_mm_packs_epi16(d64,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1066 = (a851 + 13);
|
||||
*(a1066) = s209;
|
||||
s210 = _mm_unpacklo_epi32(s207, s208);
|
||||
s211 = _mm_unpackhi_epi32(s207, s208);
|
||||
*(a876) = s210;
|
||||
*(a897) = s211;
|
||||
s212 = *(a915);
|
||||
s213 = *(a999);
|
||||
a1067 = _mm_xor_si128(a1003, a962);
|
||||
a1068 = _mm_xor_si128(a1007, a965);
|
||||
a1069 = _mm_xor_si128(a1011, a968);
|
||||
a1070 = _mm_xor_si128(a1015, a971);
|
||||
b133 = _mm_add_epi32(a1067, a1068);
|
||||
b134 = _mm_add_epi32(b133, a1069);
|
||||
t67 = _mm_add_epi32(b134, a1070);
|
||||
t68 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t67);
|
||||
m129 = _mm_add_epi32(s212, t67);
|
||||
m130 = _mm_add_epi32(s213, t68);
|
||||
m131 = _mm_add_epi32(s212, t68);
|
||||
m132 = _mm_add_epi32(s213, t67);
|
||||
d65 = _mm_cmpgt_epi32(m129, m130);
|
||||
d66 = _mm_cmpgt_epi32(m131, m132);
|
||||
a1071 = _mm_andnot_si128(d65, m129);
|
||||
a1072 = _mm_and_si128(d65, m130);
|
||||
s214 = _mm_or_si128(a1071, a1072);
|
||||
a1073 = _mm_andnot_si128(d66, m131);
|
||||
a1074 = _mm_and_si128(d66, m132);
|
||||
s215 = _mm_or_si128(a1073, a1074);
|
||||
s216 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d65,_mm_setzero_si128()),_mm_packs_epi16(d66,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1075 = (a851 + 14);
|
||||
*(a1075) = s216;
|
||||
s217 = _mm_unpacklo_epi32(s214, s215);
|
||||
s218 = _mm_unpackhi_epi32(s214, s215);
|
||||
*(a918) = s217;
|
||||
*(a939) = s218;
|
||||
s219 = *(a916);
|
||||
s220 = *(a1000);
|
||||
a1076 = _mm_xor_si128(a1003, a983);
|
||||
a1077 = _mm_xor_si128(a1007, a986);
|
||||
a1078 = _mm_xor_si128(a1011, a989);
|
||||
a1079 = _mm_xor_si128(a1015, a992);
|
||||
b135 = _mm_add_epi32(a1076, a1077);
|
||||
b136 = _mm_add_epi32(b135, a1078);
|
||||
t69 = _mm_add_epi32(b136, a1079);
|
||||
t70 = _mm_sub_epi32(_mm_set_epi32(1020, 1020, 1020, 1020), t69);
|
||||
m133 = _mm_add_epi32(s219, t69);
|
||||
m134 = _mm_add_epi32(s220, t70);
|
||||
m135 = _mm_add_epi32(s219, t70);
|
||||
m136 = _mm_add_epi32(s220, t69);
|
||||
d67 = _mm_cmpgt_epi32(m133, m134);
|
||||
d68 = _mm_cmpgt_epi32(m135, m136);
|
||||
a1080 = _mm_andnot_si128(d67, m133);
|
||||
a1081 = _mm_and_si128(d67, m134);
|
||||
s221 = _mm_or_si128(a1080, a1081);
|
||||
a1082 = _mm_andnot_si128(d68, m135);
|
||||
a1083 = _mm_and_si128(d68, m136);
|
||||
s222 = _mm_or_si128(a1082, a1083);
|
||||
s223 = _mm_movemask_epi8(_mm_packs_epi16(_mm_unpacklo_epi16(_mm_packs_epi16(d67,_mm_setzero_si128()),_mm_packs_epi16(d68,_mm_setzero_si128())),_mm_setzero_si128()));
|
||||
a1084 = (a851 + 15);
|
||||
*(a1084) = s223;
|
||||
s224 = _mm_unpacklo_epi32(s221, s222);
|
||||
s225 = _mm_unpackhi_epi32(s221, s222);
|
||||
*(a960) = s224;
|
||||
*(a981) = s225;
|
||||
}
|
||||
/* skip */
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
/***************************************************************
|
||||
This code was generated by Spiral 6.0 beta, www.spiral.net --
|
||||
Copyright (c) 2005-2008, Carnegie Mellon University.
|
||||
All rights reserved.
|
||||
The code is distributed under the GNU General Public License (GPL)
|
||||
(see http://www.gnu.org/copyleft/gpl.html)
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
******************************************************************/
|
||||
#include <stdint.h>
|
||||
|
||||
#define K 7
|
||||
#define RATE 4
|
||||
#define POLYS { 109, 79, 83, 109 }
|
||||
#define NUMSTATES 64
|
||||
#define FRAMEBITS 2048
|
||||
#define DECISIONTYPE unsigned int
|
||||
#define DECISIONTYPE_BITSIZE 32
|
||||
#define COMPUTETYPE uint32_t
|
||||
#define EBN0 3
|
||||
#define TRIALS 10000
|
||||
#define __int32 int
|
||||
#define FUNC FULL_SPIRAL
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 2000000000
|
@@ -1,380 +0,0 @@
|
||||
#
|
||||
/*
|
||||
* Copyright (C) 2013
|
||||
* Jan van Katwijk (J.vanKatwijk@gmail.com)
|
||||
* Lazy Chair Computing
|
||||
*
|
||||
* This file is part of the Qt-DAB
|
||||
* Qt-DAB is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Qt-DAB is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Qt-DAB; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The convolutional decoder for the FIC blocks has fixed sized
|
||||
* blocks, so we can use pre-generated code - for that specific
|
||||
* sized blocks - generated by the spiral project
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mm_malloc.h"
|
||||
#include "viterbi-768.h"
|
||||
#include <cstring>
|
||||
#ifdef __MINGW32__
|
||||
#include <intrin.h>
|
||||
#include <malloc.h>
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
//
|
||||
// It took a while to discover that the polynomes we used
|
||||
// in our own "straightforward" implementation was bitreversed!!
|
||||
// The official one is on top.
|
||||
#define K 7
|
||||
#define POLYS { 0155, 0117, 0123, 0155}
|
||||
//#define POLYS {109, 79, 83, 109}
|
||||
// In the reversed form the polys look:
|
||||
//#define POLYS { 0133, 0171, 0145, 0133 }
|
||||
//#define POLYS { 91, 121, 101, 91 }
|
||||
|
||||
#define METRICSHIFT 0
|
||||
#define PRECISIONSHIFT 0
|
||||
#define RENORMALIZE_THRESHOLD 137
|
||||
|
||||
//
|
||||
/* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
|
||||
#if (K-1<8)
|
||||
#define ADDSHIFT (8-(K-1))
|
||||
#define SUBSHIFT 0
|
||||
#elif (K-1>8)
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT ((K-1)-8)
|
||||
#else
|
||||
#define ADDSHIFT 0
|
||||
#define SUBSHIFT 0
|
||||
#endif
|
||||
|
||||
|
||||
static uint8_t Partab [] =
|
||||
{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0};
|
||||
|
||||
//
|
||||
// One could create the table above, i.e. a 256 entry
|
||||
// odd-parity lookup table by the following function
|
||||
// It is now precomputed
|
||||
void viterbi_768::partab_init (void){
|
||||
int16_t i,cnt,ti;
|
||||
|
||||
for (i = 0; i < 256; i++){
|
||||
cnt = 0;
|
||||
ti = i;
|
||||
while (ti != 0) {
|
||||
if (ti & 1) cnt++;
|
||||
ti >>= 1;
|
||||
}
|
||||
Partab [i] = cnt & 1;
|
||||
}
|
||||
}
|
||||
|
||||
int viterbi_768::parity (int x){
|
||||
/* Fold down to one byte */
|
||||
x ^= (x >> 16);
|
||||
x ^= (x >> 8);
|
||||
return Partab [x];
|
||||
// return parityb(x);
|
||||
}
|
||||
|
||||
static inline
|
||||
void renormalize (COMPUTETYPE* X, COMPUTETYPE threshold){
|
||||
int32_t i;
|
||||
|
||||
if (X [0] > threshold){
|
||||
COMPUTETYPE min = X [0];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
if (min > X[i])
|
||||
min = X[i];
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
X[i] -= min;
|
||||
}
|
||||
}
|
||||
//
|
||||
//
|
||||
// The main use of the viterbi decoder is in handling the FIC blocks
|
||||
// There are (in mode 1) 3 ofdm blocks, giving 4 FIC blocks
|
||||
// There all have a predefined length. In that case we use the
|
||||
// "fast" (i.e. spiral) code, otherwise we use the generic code
|
||||
viterbi_768::viterbi_768 (int16_t wordlength, bool spiral) {
|
||||
int polys [RATE] = POLYS;
|
||||
int16_t i, state;
|
||||
#ifdef __MINGW32__
|
||||
uint32_t size;
|
||||
#endif
|
||||
|
||||
frameBits = wordlength;
|
||||
this -> spiral = spiral;
|
||||
// partab_init ();
|
||||
|
||||
// B I G N O T E The spiral code uses (wordLength + (K - 1) * sizeof ...
|
||||
// However, the application then crashes, so something is not OK
|
||||
// By doubling the size, the problem disappears. It is not solved though
|
||||
// and not further investigation.
|
||||
#ifdef __MINGW32__
|
||||
size = 2 * ((wordlength + (K - 1)) / 8 + 1 + 16) & ~0xF;
|
||||
data = (uint8_t *)_aligned_malloc (size, 16);
|
||||
size = 2 * (RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE) + 16) & ~0xF;
|
||||
symbols = (COMPUTETYPE *)_aligned_malloc (size, 16);
|
||||
size = 2 * (wordlength + (K - 1)) * sizeof (decision_t);
|
||||
size = (size + 16) & ~0xF;
|
||||
vp. decisions = (decision_t *)_aligned_malloc (size, 16);
|
||||
#else
|
||||
if (posix_memalign ((void**)&data, 16,
|
||||
(wordlength + (K - 1))/ 8 + 1)){
|
||||
printf("Allocation of data array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&symbols, 16,
|
||||
RATE * (wordlength + (K - 1)) * sizeof(COMPUTETYPE))){
|
||||
printf("Allocation of symbols array failed\n");
|
||||
}
|
||||
if (posix_memalign ((void**)&(vp. decisions),
|
||||
16,
|
||||
2 * (wordlength + (K - 1)) * sizeof (decision_t))){
|
||||
printf ("Allocation of vp decisions failed\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
for (state = 0; state < NUMSTATES / 2; state++) {
|
||||
for (i = 0; i < RATE; i++)
|
||||
Branchtab [i * NUMSTATES / 2 + state] =
|
||||
(polys[i] < 0) ^
|
||||
parity((2 * state) & abs (polys[i])) ? 255 : 0;
|
||||
}
|
||||
//
|
||||
init_viterbi (&vp, 0);
|
||||
}
|
||||
|
||||
|
||||
viterbi_768::~viterbi_768 (void) {
|
||||
#ifdef __MINGW32__
|
||||
_aligned_free (vp. decisions);
|
||||
_aligned_free (data);
|
||||
_aligned_free (symbols);
|
||||
#else
|
||||
free (vp. decisions);
|
||||
free (data);
|
||||
free (symbols);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int maskTable [] = {128, 64, 32, 16, 8, 4, 2, 1};
|
||||
static inline
|
||||
uint8_t getbit (uint8_t v, int32_t o) {
|
||||
return (v & maskTable [o]) ? 1 : 0;
|
||||
}
|
||||
|
||||
//static
|
||||
//uint8_t getbit (uint8_t v, int32_t o) {
|
||||
//uint8_t mask = 1 << (7 - o);
|
||||
// return (v & mask) ? 1 : 0;
|
||||
//}
|
||||
|
||||
// depends: POLYS, RATE, COMPUTETYPE
|
||||
// encode was only used for testing purposes
|
||||
//void encode (/*const*/ unsigned char *bytes, COMPUTETYPE *symbols, int nbits) {
|
||||
//int i, k;
|
||||
//int polys [RATE] = POLYS;
|
||||
//int sr = 0;
|
||||
//
|
||||
//// FIXME: this is slowish
|
||||
//// -- remember about the padding!
|
||||
// for (i = 0; i < nbits + (K - 1); i++) {
|
||||
// int b = bytes[i/8];
|
||||
// int j = i % 8;
|
||||
// int bit = (b >> (7-j)) & 1;
|
||||
//
|
||||
// sr = (sr << 1) | bit;
|
||||
// for (k = 0; k < RATE; k++)
|
||||
// *(symbols++) = parity(sr & polys[k]);
|
||||
// }
|
||||
//}
|
||||
|
||||
// Note that our DAB environment maps the softbits to -127 .. 127
|
||||
// we have to map that onto 0 .. 255
|
||||
|
||||
void viterbi_768::deconvolve (int16_t *input, uint8_t *output) {
|
||||
uint32_t i;
|
||||
|
||||
init_viterbi (&vp, 0);
|
||||
for (i = 0; i < (uint16_t)(frameBits + (K - 1)) * RATE; i ++) {
|
||||
int16_t temp = input [i] + 127;
|
||||
if (temp < 0) temp = 0;
|
||||
if (temp > 255) temp = 255;
|
||||
symbols [i] = temp;
|
||||
}
|
||||
if (!spiral)
|
||||
update_viterbi_blk_GENERIC (&vp, symbols, frameBits + (K - 1));
|
||||
else
|
||||
update_viterbi_blk_SPIRAL (&vp, symbols, frameBits + (K - 1));
|
||||
|
||||
chainback_viterbi (&vp, data, frameBits, 0);
|
||||
|
||||
for (i = 0; i < (uint16_t)frameBits; i ++)
|
||||
output [i] = getbit (data [i >> 3], i & 07);
|
||||
}
|
||||
|
||||
/* C-language butterfly */
|
||||
void viterbi_768::BFLY (int i, int s, COMPUTETYPE * syms,
|
||||
struct v * vp, decision_t * d) {
|
||||
int32_t j, decision0, decision1;
|
||||
COMPUTETYPE metric,m0,m1,m2,m3;
|
||||
|
||||
metric =0;
|
||||
for (j = 0; j < RATE;j++)
|
||||
metric += (Branchtab [i + j * NUMSTATES/2] ^ syms[s*RATE+j]) >>
|
||||
METRICSHIFT ;
|
||||
metric = metric >> PRECISIONSHIFT;
|
||||
const COMPUTETYPE max =
|
||||
((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
|
||||
|
||||
m0 = vp->old_metrics->t [i] + metric;
|
||||
m1 = vp->old_metrics->t [i + NUMSTATES / 2] + (max - metric);
|
||||
m2 = vp->old_metrics->t [i] + (max - metric);
|
||||
m3 = vp->old_metrics->t [i + NUMSTATES / 2] + metric;
|
||||
|
||||
decision0 = ((int32_t)(m0 - m1)) > 0;
|
||||
decision1 = ((int32_t)(m2 - m3)) > 0;
|
||||
|
||||
vp -> new_metrics-> t[2 * i] = decision0 ? m1 : m0;
|
||||
vp -> new_metrics-> t[2 * i + 1] = decision1 ? m3 : m2;
|
||||
|
||||
d -> w[i/(sizeof(uint32_t)*8/2)+s*(sizeof(decision_t)/sizeof(uint32_t))] |=
|
||||
(decision0|decision1<<1) << ((2*i)&(sizeof(uint32_t)*8-1));
|
||||
}
|
||||
|
||||
/* Update decoder with a block of demodulated symbols
|
||||
* Note that nbits is the number of decoded data bits, not the number
|
||||
* of symbols!
|
||||
*/
|
||||
void viterbi_768::update_viterbi_blk_GENERIC (struct v *vp,
|
||||
COMPUTETYPE *syms,
|
||||
int16_t nbits){
|
||||
decision_t *d = (decision_t *)vp -> decisions;
|
||||
int32_t s, i;
|
||||
|
||||
for (s = 0; s < nbits; s++)
|
||||
memset (&d [s], 0, sizeof (decision_t));
|
||||
|
||||
for (s = 0; s < nbits; s++){
|
||||
void *tmp;
|
||||
for (i = 0; i < NUMSTATES / 2; i++)
|
||||
BFLY (i, s, syms, vp, vp -> decisions);
|
||||
|
||||
renormalize (vp -> new_metrics -> t, RENORMALIZE_THRESHOLD);
|
||||
// Swap pointers to old and new metrics
|
||||
tmp = vp -> old_metrics;
|
||||
vp -> old_metrics = vp -> new_metrics;
|
||||
vp -> new_metrics = (metric_t *)tmp;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#if defined(SSE_AVAILABLE)
|
||||
void FULL_SPIRAL_sse (int,
|
||||
#elif defined(NEON_AVAILABLE)
|
||||
void FULL_SPIRAL_neon (int,
|
||||
#else
|
||||
void FULL_SPIRAL_no_sse (int,
|
||||
#endif
|
||||
COMPUTETYPE *Y,
|
||||
COMPUTETYPE *X,
|
||||
COMPUTETYPE *syms,
|
||||
DECISIONTYPE *dec,
|
||||
COMPUTETYPE *Branchtab);
|
||||
}
|
||||
|
||||
void viterbi_768::update_viterbi_blk_SPIRAL (struct v *vp,
|
||||
COMPUTETYPE *syms,
|
||||
int16_t nbits){
|
||||
decision_t *d = (decision_t *)vp -> decisions;
|
||||
int32_t s;
|
||||
|
||||
for (s = 0; s < nbits; s++)
|
||||
memset (d + s, 0, sizeof(decision_t));
|
||||
|
||||
#if defined(SSE_AVAILABLE)
|
||||
FULL_SPIRAL_sse (nbits,
|
||||
#elif defined(NEON_AVAILABLE)
|
||||
FULL_SPIRAL_neon (nbits,
|
||||
#else
|
||||
FULL_SPIRAL_no_sse (nbits,
|
||||
#endif
|
||||
vp -> new_metrics -> t,
|
||||
vp -> old_metrics -> t,
|
||||
syms,
|
||||
d -> t, Branchtab);
|
||||
}
|
||||
|
||||
//
|
||||
/* Viterbi chainback */
|
||||
void viterbi_768::chainback_viterbi (struct v *vp,
|
||||
uint8_t *data, /* Decoded output data */
|
||||
int16_t nbits, /* Number of data bits */
|
||||
uint16_t endstate){ /*Terminal encoder state */
|
||||
decision_t *d = vp -> decisions;
|
||||
|
||||
/* Make room beyond the end of the encoder register so we can
|
||||
* accumulate a full byte of decoded data
|
||||
*/
|
||||
endstate = (endstate % NUMSTATES) << ADDSHIFT;
|
||||
/* The store into data[] only needs to be done every 8 bits.
|
||||
* But this avoids a conditional branch, and the writes will
|
||||
* combine in the cache anyway
|
||||
*/
|
||||
d += (K - 1); /* Look past tail */
|
||||
while (nbits-- != 0){
|
||||
int k;
|
||||
// int l = (endstate >> ADDSHIFT) / 32;
|
||||
// int m = (endstate >> ADDSHIFT) % 32;
|
||||
k = (d [nbits].w [(endstate >> ADDSHIFT) / 32] >>
|
||||
((endstate>>ADDSHIFT) % 32)) & 1;
|
||||
endstate = (endstate >> 1) | (k << (K - 2 + ADDSHIFT));
|
||||
data [nbits >> 3] = endstate >> SUBSHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize Viterbi decoder for start of new frame */
|
||||
void viterbi_768::init_viterbi (struct v *p, int16_t starting_state){
|
||||
struct v *vp = p;
|
||||
int32_t i;
|
||||
|
||||
for (i = 0; i < NUMSTATES; i++)
|
||||
vp -> metrics1.t[i] = 63;
|
||||
|
||||
vp -> old_metrics = &vp -> metrics1;
|
||||
vp -> new_metrics = &vp -> metrics2;
|
||||
/* Bias known start state */
|
||||
vp -> old_metrics-> t[starting_state & (NUMSTATES-1)] = 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user