first workign implementation of wavelet-based codec

This commit is contained in:
csoler 2015-08-16 22:59:49 -04:00
parent 49c6c8a1fa
commit d28c1898fd
3 changed files with 519 additions and 13 deletions

View File

@ -0,0 +1,267 @@
#pragma once
#include <vector>
#include <string.h>
#include <stdlib.h>
#include <stdexcept>
#include <math.h>
#ifdef USE_SSE_INSTRUCTIONS
#include <Math/sse_block.h>
#endif
template<class FLOAT> class DaubechyWavelets
{
public:
typedef enum { DWT_DAUB02=2, DWT_DAUB04=4, DWT_DAUB12=12, DWT_DAUB20=20 } WaveletType ;
typedef enum { DWT_FORWARD=1, DWT_BACKWARD=0 } TransformType ;
static void DWT2D(FLOAT *data,unsigned long int W,unsigned long int H,WaveletType type,TransformType tr)
{
unsigned long int nn[2] = {W,H} ;
wtn(&data[-1], &nn[-1],2, tr, waveletFilter(type), pwt) ;
}
static void DWT1D(FLOAT *data,unsigned long int W,WaveletType type,TransformType tr)
{
unsigned long int nn[1] = {W} ;
wtn(&data[-1], &nn[-1],1, tr, waveletFilter(type), pwt) ;
}
private:
class wavefilt
{
public:
wavefilt(int n)
{
int k;
FLOAT sig = -1.0;
static const FLOAT c2[5]={ 0.0, sqrt(2.0)/2.0, sqrt(2.0)/2.0, 0.0, 0.0 };
static const FLOAT c4[5]={ 0.0, 0.4829629131445341, 0.8365163037378079, 0.2241438680420134,-0.1294095225512604 };
static const FLOAT c12[13]={0.0,0.111540743350, 0.494623890398, 0.751133908021,
0.315250351709,-0.226264693965,-0.129766867567,
0.097501605587, 0.027522865530,-0.031582039318,
0.000553842201, 0.004777257511,-0.001077301085};
static const FLOAT c20[21]={0.0,0.026670057901, 0.188176800078, 0.527201188932,
0.688459039454, 0.281172343661,-0.249846424327,
-0.195946274377, 0.127369340336, 0.093057364604,
-0.071394147166,-0.029457536822, 0.033212674059,
0.003606553567,-0.010733175483, 0.001395351747,
0.001992405295,-0.000685856695,-0.000116466855,
0.000093588670,-0.000013264203 };
ncof= (n==2)?4:n;
const FLOAT *tmpcc ;
cc.resize(ncof+1) ;
cr.resize(ncof+1) ;
if (n == 2)
{
tmpcc=c2;
cc[1] = tmpcc[1] ;
cc[2] = tmpcc[2] ;
cc[3] = 0.0f ;
cc[4] = 0.0f ;
cr[1] = tmpcc[1] ;
cr[2] =-tmpcc[2] ;
cr[3] = 0.0f ;
cr[4] = 0.0f ;
ioff = joff = -1 ;
}
else
{
if (n == 4)
tmpcc=c4;
else if (n == 12)
tmpcc=c12;
else if (n == 20)
tmpcc=c20;
else
throw std::runtime_error("unimplemented value n in pwtset");
for (k=1;k<=n;k++)
{
cc[k] = tmpcc[k] ;
cr[ncof+1-k]=sig*tmpcc[k];
sig = -sig;
}
ioff = joff = -(n >> 1);
}
}
~wavefilt() {}
int ncof,ioff,joff;
std::vector<FLOAT> cc;
std::vector<FLOAT> cr;
} ;
static const wavefilt& waveletFilter(WaveletType type)
{
static wavefilt *daub02filt = NULL ;
static wavefilt *daub04filt = NULL ;
static wavefilt *daub12filt = NULL ;
static wavefilt *daub20filt = NULL ;
switch(type)
{
case DWT_DAUB02: if(daub02filt == NULL)
daub02filt = new wavefilt(2) ;
return *daub02filt ;
case DWT_DAUB04: if(daub04filt == NULL)
daub04filt = new wavefilt(4) ;
return *daub04filt ;
case DWT_DAUB12: if(daub12filt == NULL)
daub12filt = new wavefilt(12) ;
return *daub12filt ;
case DWT_DAUB20: if(daub20filt == NULL)
daub20filt = new wavefilt(20) ;
return *daub20filt ;
default:
throw std::runtime_error("Unknown wavelet type.") ;
}
}
static void pwt(FLOAT a[], unsigned long n, int isign,const wavefilt& wfilt)
{
/********************** BEGIN SIGNED PART *************************/
/** md5sum = 2b9e1e38ac690f50806873cdb4a061ea **/
/** Validation date = 08/10/10 **/
/******************************************************************/
unsigned long i,ii,ni,nj ;
if (n < 4)
return;
FLOAT *wksp=new FLOAT[n+1];//vector(1,n);
FLOAT ai,ai1 ;
unsigned long int nmod=wfilt.ncof*n;
unsigned long int n1=n-1;
unsigned long int nh=n >> 1;
memset(wksp,0,(n+1)*sizeof(FLOAT)) ;
if (isign == DWT_FORWARD)
for (ii=1,i=1;i<=n;i+=2,ii++)
{
ni=i+nmod+wfilt.ioff;
nj=i+nmod+wfilt.joff;
#ifdef USE_SSE_INSTRUCTIONS
#warning Using SSE2 Instruction set for wavelet internal loops
for (int k=1;k<=wfilt.ncof;k+=4)
{
int jf=ni+k;
int jr=nj+k;
sse_block w1(wfilt.cc[k],wfilt.cc[k+1],wfilt.cc[k+2],wfilt.cc[k+3]) ;
sse_block w2(wfilt.cr[k],wfilt.cr[k+1],wfilt.cr[k+2],wfilt.cr[k+3]) ;
sse_block a1( a[1+((jf+0)&n1)], a[1+((jf+1)&n1)], a[1+((jf+2)&n1)], a[1+((jf+3)&n1)]) ;
sse_block a2( a[1+((jr+0)&n1)], a[1+((jr+1)&n1)], a[1+((jr+2)&n1)], a[1+((jr+3)&n1)]) ;
sse_block wk1( w1*a1 ) ;
sse_block wk2( w2*a2 ) ;
wksp[ii ] += wk1.sum() ;
wksp[ii+nh] += wk2.sum() ;
}
#else
for (int k=1;k<=wfilt.ncof;k++)
{
int jf=n1 & (ni+k);
int jr=n1 & (nj+k);
wksp[ii] += wfilt.cc[k]*a[jf+1];
wksp[ii+nh] += wfilt.cr[k]*a[jr+1];
}
#endif
}
else
for (ii=1,i=1;i<=n;i+=2,ii++)
{
ai=a[ii];
ai1=a[ii+nh];
ni=i+nmod+wfilt.ioff;
nj=i+nmod+wfilt.joff;
#ifdef USE_SSE_INSTRUCTIONS
sse_block ai_sse( ai,ai,ai,ai ) ;
sse_block ai1_sse( ai1,ai1,ai1,ai1 ) ;
for (int k=1;k<=wfilt.ncof;k+=4)
{
int jf=ni+k ;
int jr=nj+k ; // in fact we have jf==jr, so the code is simpler.
sse_block w1(wksp[1+((jf+0) & n1)],wksp[1+((jf+1) & n1)],wksp[1+((jf+2) & n1)],wksp[1+((jf+3) & n1)]) ;
w1 += sse_block(wfilt.cc[k+0],wfilt.cc[k+1],wfilt.cc[k+2],wfilt.cc[k+3]) * ai_sse ;
w1 += sse_block(wfilt.cr[k+0],wfilt.cr[k+1],wfilt.cr[k+2],wfilt.cr[k+3]) * ai1_sse ;
wksp[1+((jr+0) & n1)] = w1[0] ;
wksp[1+((jr+1) & n1)] = w1[1] ;
wksp[1+((jr+2) & n1)] = w1[2] ;
wksp[1+((jr+3) & n1)] = w1[3] ;
}
#else
for (int k=1;k<=wfilt.ncof;++k)
{
wksp[(n1 & (ni+k))+1] += wfilt.cc[k]*ai;
wksp[(n1 & (nj+k))+1] += wfilt.cr[k]*ai1;
}
#endif
}
for (uint j=1;j<=n;j++)
a[j]=wksp[j];
delete[] wksp ;//free_vector(wksp,1,n);
/********************** END SIGNED PART *************************/
}
static void wtn(FLOAT a[], unsigned long nn[], int ndim, int isign, const wavefilt& w,void (*wtstep)(FLOAT [], unsigned long, int,const wavefilt&))
{
unsigned long i1,i2,i3,k,n,nnew,nprev=1,nt,ntot=1;
int idim;
FLOAT *wksp;
for (idim=1;idim<=ndim;idim++)
ntot *= nn[idim];
wksp=new FLOAT[ntot+1] ; //vector(1,ntot);
for (idim=1;idim<=ndim;idim++)
{
n=nn[idim];
nnew=n*nprev;
if (n > 4)
for (i2=0;i2<ntot;i2+=nnew)
for (i1=1;i1<=nprev;i1++)
{
for (i3=i1+i2,k=1;k<=n;k++,i3+=nprev) wksp[k]=a[i3];
if(isign == DWT_FORWARD)
for(nt=n;nt>=4;nt >>= 1)
(*wtstep)(wksp,nt,isign,w);
else
for(nt=4;nt<=n;nt <<= 1)
(*wtstep)(wksp,nt,isign,w);
for (i3=i1+i2,k=1;k<=n;k++,i3+=nprev) a[i3]=wksp[k];
}
nprev=nnew;
}
delete[] wksp ;//free_vector(wksp,1,ntot);
}
};

View File

@ -7,12 +7,14 @@
#include "VideoProcessor.h"
#include "QVideoDevice.h"
#include "DaubechyWavelets.h"
VideoProcessor::VideoProcessor()
:_encoded_frame_size(128,128)
{
_decoded_output_device = NULL ;
_encoding_current_codec = VIDEO_PROCESSOR_CODEC_ID_JPEG_VIDEO;
//_encoding_current_codec = VIDEO_PROCESSOR_CODEC_ID_JPEG_VIDEO;
_encoding_current_codec = VIDEO_PROCESSOR_CODEC_ID_DDWT_VIDEO;
}
bool VideoProcessor::processImage(const QImage& img,uint32_t size_hint,uint32_t& encoded_size)
@ -31,7 +33,6 @@ bool VideoProcessor::processImage(const QImage& img,uint32_t size_hint,uint32_t&
// std::cerr << "reducing to " << _frame_size.width() << " x " << _frame_size.height() << std::endl;
void *data = NULL;
encoded_size = 0 ;
if(codec)
@ -111,6 +112,8 @@ void VideoProcessor::receiveEncodedData(const RsVOIPDataChunk& chunk)
if(codec != NULL)
codec->decodeData(chunk,img) ;
else
std::cerr << "Unknown decoding codec: " << codid << std::endl;
if(_decoded_output_device)
_decoded_output_device->showFrame(img) ;
@ -159,7 +162,7 @@ bool JPEGVideo::decodeData(const RsVOIPDataChunk& chunk,QImage& image)
QImage res = _decoded_reference_frame ;
for(uint32_t i=0;i<image.byteCount();++i)
for(int i=0;i<image.byteCount();++i)
{
int new_val = (int)res.bits()[i] + ((int)image.bits()[i] - 128) ;
@ -186,7 +189,7 @@ bool JPEGVideo::encodeData(const QImage& image,uint32_t /* size_hint */,RsVOIPDa
// compute difference with reference frame.
encoded_frame = image ;
for(uint32_t i=0;i<image.byteCount();++i)
for(int i=0;i<image.byteCount();++i)
{
// We cannot use basic modulo 256 arithmetic, because the decompressed JPeg frames do not follow the same rules (values are clamped)
// and cause color blotches when perturbated by a differential frame.
@ -222,7 +225,7 @@ bool JPEGVideo::encodeData(const QImage& image,uint32_t /* size_hint */,RsVOIPDa
((unsigned char *)voip_chunk.data)[2] = flags & 0xff ;
((unsigned char *)voip_chunk.data)[3] = (flags >> 8) & 0xff ;
memcpy(voip_chunk.data+HEADER_SIZE,qb.data(),qb.size()) ;
memcpy(&((unsigned char*)voip_chunk.data)[HEADER_SIZE],qb.data(),qb.size()) ;
voip_chunk.size = HEADER_SIZE + qb.size() ;
voip_chunk.type = RsVOIPDataChunk::RS_VOIP_DATA_TYPE_VIDEO ;
@ -230,3 +233,230 @@ bool JPEGVideo::encodeData(const QImage& image,uint32_t /* size_hint */,RsVOIPDa
return true ;
}
bool WaveletVideo::encodeData(const QImage& image,uint32_t size_hint,RsVOIPDataChunk& voip_chunk)
{
static const int WAVELET_IMG_SIZE = 128 ;
static const float W_THRESHOLD = 0.005 ; // low quality
//static const float W_THRESHOLD = 0.0001; // high quality
//static const float W_THRESHOLD = 0.0005; // medium quality
static const int W2 = WAVELET_IMG_SIZE ;
static const int H2 = WAVELET_IMG_SIZE ;
assert(image.width() == W2) ;
assert(image.height() == H2) ;
float *temp = new float[W2*H2] ;
std::cerr << " codec type: wavelets." << std::endl;
// We should perform some interpolation here ;-)
//
for(int i=0;i<W2*H2;++i)
temp[i] = (0.3*image.constBits()[4*i+1] + 0.59*image.constBits()[4*i+2] + 0.11*image.constBits()[4*i+3]) / 255.0 ;
std::cerr << " resized image to B&W " << W2 << "x" << H2 << std::endl;
DaubechyWavelets<float>::DWT2D(temp,W2,H2,DaubechyWavelets<float>::DWT_DAUB04,DaubechyWavelets<float>::DWT_FORWARD) ;
// Now estimate the max energy in the W coefs, and only keep the largest.
float mx = 0.0f ;
for(int i=0;i<W2*H2;++i)
if(mx < fabsf(temp[i]))
mx = fabs(temp[i]) ;
mx *= 1.1; // This avoids quantisation problems with wavelet coefs when they get too close to mx.
std::cerr << " max wavelet coef : " << mx << std::endl;
std::vector<uint16_t> compressed_values ;
compressed_values.reserve(W2*H2) ;
for(int i=0;i<W2*H2;++i)
if(fabs(temp[i]) >= W_THRESHOLD*mx) // This needs to be improved. Wavelets do not all have the same visual impact.
{
// add one value, using 16 bits for coordinates and 16 bits for the value.
compressed_values.push_back((uint16_t)i) ;
compressed_values.push_back(quantize_16b(temp[i],mx)) ;
//float f2 = from_quantized_16b(quantize_16b(temp[i],mx),mx) ;
//if(fabs(f2 - temp[i]) >= 0.01*(fabs(temp[i])+fabs(f2)))
//std::cerr << " before: " << temp[i] << ", quantised=" << quantize_16b(temp[i],mx)<< ", after: " << f2 << std::endl;
}
delete[] temp ;
// Serialise all values into a memory buffer. This needs to be taken care of because of endian issues.
int compressed_size = 4 + compressed_values.size()*2 ;
std::cerr << " threshold : " << W_THRESHOLD << std::endl;
std::cerr << " values kept: " << compressed_values.size()/2 << std::endl;
std::cerr << " compression: " << compressed_size/float(W2*H2*3)*100 << " %" << std::endl;
voip_chunk.data = malloc(HEADER_SIZE + compressed_size) ;
// build header
uint32_t flags = 0 ;
((unsigned char *)voip_chunk.data)[0] = VideoProcessor::VIDEO_PROCESSOR_CODEC_ID_DDWT_VIDEO & 0xff ;
((unsigned char *)voip_chunk.data)[1] = (VideoProcessor::VIDEO_PROCESSOR_CODEC_ID_DDWT_VIDEO >> 8) & 0xff ;
((unsigned char *)voip_chunk.data)[2] = flags & 0xff ;
((unsigned char *)voip_chunk.data)[3] = (flags >> 8) & 0xff ;
unsigned char *compressed_mem = &((unsigned char *)voip_chunk.data)[HEADER_SIZE] ;
serialise_ufloat(compressed_mem,mx) ;
for(uint32_t i=0;i<compressed_values.size();++i)
{
compressed_mem[4 + 2*i+0] = compressed_values[i] & 0xff ;
compressed_mem[4 + 2*i+1] = compressed_values[i] >> 8 ;
}
voip_chunk.type = RsVOIPDataChunk::RS_VOIP_DATA_TYPE_VIDEO ;
voip_chunk.size = HEADER_SIZE + compressed_size ;
return true ;
}
bool WaveletVideo::decodeData(const RsVOIPDataChunk& chunk,QImage& image)
{
static const int WAVELET_IMG_SIZE = 128 ;
static const int W2 = WAVELET_IMG_SIZE ;
static const int H2 = WAVELET_IMG_SIZE ;
float *temp = new float[W2*H2] ;
const unsigned char *compressed_mem = &static_cast<const unsigned char *>(chunk.data)[HEADER_SIZE] ;
int compressed_size = chunk.size - HEADER_SIZE;
memset(temp,0,W2*H2*sizeof(float)) ;
float M = deserialise_ufloat(compressed_mem);
#ifdef VOIP_CODEC_DEBUG
std::cerr << " codec type: wavelets." << std::endl;
std::cerr << " max coef: " << M << std::endl;
#endif
for(int i=4;i<compressed_size;i+=4)
{
// read all values. first 2 bytes: image coordinates.
// next two bytes: value.
//
uint16_t indx = compressed_mem[i+0] + (compressed_mem[i+1] << 8) ;
uint16_t encv = compressed_mem[i+2] + (compressed_mem[i+3] << 8) ;
float f = from_quantized_16b(encv,M) ;
temp[indx] = f ;
}
#ifdef VOIP_CODEC_DEBUG
std::cerr << " values read: " << compressed_size/4-1 << std::endl;
#endif
DaubechyWavelets<float>::DWT2D(temp,W2,H2,DaubechyWavelets<float>::DWT_DAUB04,DaubechyWavelets<float>::DWT_BACKWARD) ;
#ifdef VOIP_CODEC_DEBUG
std::cerr << " resizing image to: " << w << "x" << h << std::endl;
#endif
image = QImage(W2,H2,QImage::Format_RGB32) ;
int indx = 0 ;
for(int j=0;j<H2;++j)
for(int i=0;i<W2;++i,++indx)
{
uint32_t val = std::min(255,std::max(0,(int)(255*temp[indx]))) ;
QRgb rgb = (0xff << 24) + (val << 16) + (val << 8) + val ;
image.setPixel(i,j,rgb);
}
delete[] temp ;
return true ;
}
uint16_t WaveletVideo::quantize_16b(float x,float M)
{
// Do the quantization into
// x = M * (m * 2^{-p} / 2^10)
//
// where m is coded on 10 bits (0->1023), and p is coded on 6 bits (0->63).
// Packing [mp] into a 16bit uint16_t. M is the maximum coefficient over the quantization
// process.
//
// So this represents numbers from M * 1 * 2^{-73} to M
//
// All calculatoins are performed on x/M*2^10
//
static const float LOG2 = log(2.0f) ;
int m,p ;
if(fabs(x) < 1e-8*M)
{
m = 0 ;
p = 0 ;
}
else
{
float log2f = log(fabsf(x)/M)/LOG2 ;
int mexp = (int)floor(MANTISSE_BITS - log2f) ;
m = (int)floor(pow(2.0f,mexp+log2f)) ;
p = mexp ;
if(p > (1<<EXPONENT_BITS)-1)
m=0 ;
}
return (uint16_t)(p & ((1<<EXPONENT_BITS)-1)) + (uint16_t)((m & ((1<<MANTISSE_BITS)-1)) << EXPONENT_BITS) + ((x<0.0)?32768:0);
}
float WaveletVideo::from_quantized_16b(uint16_t n,float M)
{
M *= (n&32768)?-1:1 ;
n &= 32767 ;
uint32_t p = n & ((1<<EXPONENT_BITS)-1) ;
uint32_t m = (n & (((1<<MANTISSE_BITS)-1) << EXPONENT_BITS)) >> EXPONENT_BITS ;
if(p > 10)
return M * m / 1024.0f / (float)(1 << (p-10)) ;
else
return M * m / (float)(1 << p) ;
}
void WaveletVideo::serialise_ufloat(unsigned char *mem, float f)
{
if(f < 0.0f)
{
std::cerr << "(EE) Cannot serialise invalid negative float value " << f << " in " << __PRETTY_FUNCTION__ << std::endl;
return ;
}
// This serialisation is quite accurate. The max relative error is approx.
// 0.01% and most of the time less than 1e-05% The error is well distributed
// over numbers also.
//
uint32_t n = (f < 1e-7)?(~(uint32_t)0): ((uint32_t)( (1.0f/(1.0f+f) * (~(uint32_t)0)))) ;
mem[0] = n & 0xff ; n >>= 8 ;
mem[1] = n & 0xff ; n >>= 8 ;
mem[2] = n & 0xff ; n >>= 8 ;
mem[3] = n & 0xff ;
}
float WaveletVideo::deserialise_ufloat(const unsigned char *mem)
{
uint32_t n = mem[3] ;
n = (n << 8) + mem[2] ;
n = (n << 8) + mem[1] ;
n = (n << 8) + mem[0] ;
return 1.0f/ ( n/(float)(~(uint32_t)0)) - 1.0f ;
}

View File

@ -11,6 +11,9 @@ class VideoCodec
public:
virtual bool encodeData(const QImage& Image, uint32_t size_hint, RsVOIPDataChunk& chunk) = 0;
virtual bool decodeData(const RsVOIPDataChunk& chunk,QImage& image) = 0;
protected:
static const uint32_t HEADER_SIZE = 0x04 ;
};
// Now derive various image encoding/decoding algorithms.
@ -25,7 +28,6 @@ protected:
virtual bool encodeData(const QImage& Image, uint32_t size_hint, RsVOIPDataChunk& chunk) ;
virtual bool decodeData(const RsVOIPDataChunk& chunk,QImage& image) ;
static const uint32_t HEADER_SIZE = 0x04 ;
static const uint32_t JPEG_VIDEO_FLAGS_DIFFERENTIAL_FRAME = 0x0001 ;
private:
QImage _decoded_reference_frame ;
@ -35,17 +37,24 @@ private:
uint32_t _encoded_ref_frame_count ;
};
class DifferentialWaveletVideo: public VideoCodec
class WaveletVideo: public VideoCodec
{
public:
DifferentialWaveletVideo() {}
WaveletVideo() {}
protected:
virtual bool encodeData(const QImage& Image, uint32_t size_hint, RsVOIPDataChunk& chunk) { return true ; }
virtual bool decodeData(const RsVOIPDataChunk& chunk,QImage& image) { return true ; }
virtual bool encodeData(const QImage& Image, uint32_t size_hint, RsVOIPDataChunk& chunk) ;
virtual bool decodeData(const RsVOIPDataChunk& chunk,QImage& image) ;
private:
QImage _last_reference_frame ;
static const int MANTISSE_BITS = 9 ;
static const int EXPONENT_BITS = 6 ;
static void serialise_ufloat(unsigned char *mem, float f);
static float deserialise_ufloat(const unsigned char *mem);
static float from_quantized_16b(uint16_t n, float M);
static uint16_t quantize_16b(float x, float M);
};
// This class decodes video from a stream. It keeps a queue of
@ -106,7 +115,7 @@ class VideoProcessor
// =====================================================================================
JPEGVideo _jpeg_video_codec ;
DifferentialWaveletVideo _ddwt_video_codec ;
WaveletVideo _ddwt_video_codec ;
uint16_t _encoding_current_codec ;
};