/* OPTICAL FLOW ROUTINE after Liu et al.
 *
 * implemented by Uli Mezger Juni,2000
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "liu.h"

#include "../FW/labImage.h"
#include "../FW/labImgDraw.h"

#include "mathmi.h"

//Maximum frame buffer size
#ifndef _MAX_S_LIU 
#define MAX_S_LIU 25
#endif

//Save filter outputs to HD and exit
#define SHOW_FILTER 0
#define FAST 1
#define PICTURE 1
#define DEBUG 0
//Save matrices on HD, or not?
#define WRITE_MATRIX 0

//Static picture buffer
static Picture *p_store_pic[MAX_S_LIU];

//Output picture of the Gaus / Hermite filter operation
static float *floatpic_t_001,                  *floatpic_y_001;//
static float                  *floatpic_x_011, *floatpic_y_011;//
static float *floatpic_t_100, *floatpic_x_100, *floatpic_y_100;//
static float                  *floatpic_x_101, *floatpic_y_101;//
static float                                   *floatpic_y_020;//
static float                                   *floatpic_y_110;//
static float                  *floatpic_x_200, *floatpic_y_200;//
static float                  *floatpic_x_010, *floatpic_y_010;//

//Sampled Gaus and Hermite filter; stored as look-up-tables
static float *gausixy, *gausit;
static float **hermite_xy, **hermite_t;

//Motion field components
float u_liu,v_liu;

//Matrices and Vectors for QR-Decomposition
static float **A,**Q,**R;
static float **P1,**P2,**P1xA;
static float *b,*Qxb;

//Flag to compute new filter pictures
int start_generate;
static int ringbuffer_liu;

//indicate first optic flow calculation
//=>all the necessary 'init stuff' happens then
int initi=1;
int change=0;

//Save matrices only once on HD, just for debug
int matrix_stop=0;

//Counts how many pictures are currently in the buffer
extern int count_pic_liu;

/////////////////
//Optical Flow //
/////////////////
int calculate_optic_flow_liu(Picture *p_src, 
			     int W_x, int  W_y, int W_t, 
			     int x0, int y0, int x1, int y1,
			     int stepx, int stepy,
			     float lampda_min, 
			     float residual_min, 
			     float cond,
			     vec_field *liu_field, 
			     Picture *p_out) 
{  
  //set area (x0,y0-x1,y1) to calculate optic flow 
  // and  for which picture in buffer (= 't')
  int teo_tmp;
  float sigmaxy=W_x/4;
  float sigmat=W_t/4;
  
  //Only MAX_S_LIU pictures fit in the buffer
  if(W_t>=MAX_S_LIU)
    {
      W_t=MAX_S_LIU;
      fprintf(stdout,"W_t>MAX_S_LIU => No way!!\n");
    }
  
  // Fill picture buffer with pictures 
  if( count_pic_liu < W_t )
    {
      //Countdown buffer
      printf("Buffer: %d\n",(W_t-count_pic_liu-1));
      
      //Copy new picture in buffer 
      p_store_pic[count_pic_liu]=copyImage(p_src);
      
      //Copy fails?
      if(p_store_pic[count_pic_liu]->data==NULL)
	fprintf(stdout,"Doh! -> p_store_pic\n");
      else     
	//Copy fails?->No! 
	//New picture added to buffer => increase the counter!
	count_pic_liu++;
      
      //Increase ring buffer counter
      ringbuffer_liu=(W_t-1);
      
      //Indicate start of routine
      initi=1;
      change=0;
      
      return 0;
    }
  //////////////////////////////////////////
  //Buffer full !!! => Start processing !!//
  //////////////////////////////////////////
  else
    { 
      //Freeze picture -> Debug purpose
      //if(count_pic_liu==W_t-1)
	{
	  //Copy actual picture in frame
	  p_store_pic[ringbuffer_liu]=copyImage(p_src); 
	  //  count_pic_liu++;
	}
	
	//Centre frame to operate on  
	teo_tmp=(ringbuffer_liu+((W_t-1)>>1)+1)%(W_t);

#if PICTURE 
	if(p_out->format==pix_grey)
	  { //Give centre frame as overlay pic
	    memcpy( (unsigned char *)p_out->data,
		    (unsigned char *)p_store_pic[teo_tmp]->data,
		    (size_t) p_store_pic[teo_tmp]->datasize);
	  }
	else
	  {//Implicit conversion from grey to rgb
	    BYTE *ptr2=p_out->data,*ptr1=p_store_pic[teo_tmp]->data;
	    
	    for(int i=0; i<p_store_pic[teo_tmp]->datasize; i++)
	      {
		*ptr2++=*ptr1;
		*ptr2++=*ptr1;
		*ptr2++=*ptr1++;
	      }
	  }
#endif PICTURE
	
	/////////////////////////
	//OPTICAL FLOW routine //
	////////////////////////
	extract_optic_flow(x0, y0, x1, y1, teo_tmp, // Area
			   W_x, W_t, stepx, stepy,
			   sigmaxy, sigmat, //Dimensions&sigma 
			   lampda_min, 
			   residual_min, 
			   cond, 
			   0, //Parameters to play with
			   p_out, //Output picture => drawn vector field
			   liu_field); 
	 
	//Increase ring buffer-counter MODULO size of ring buffer
	//->no pictures need to be recopied!
	ringbuffer_liu = (ringbuffer_liu+1)%(W_t);
	
	//Make room for next picture
	freeImage(p_store_pic[ringbuffer_liu]);
	
	return 1;
	
    }
  
  return 0;
}

/////////////////////////////////////////////////////////////////////////////
//Optic flow routine; Spatio-temporal filtering with 3D Hermite Polynomials//
////////////////////////////////////////////////////////////////////////////
void extract_optic_flow(int x0, int y0, int x1, int y1, int t,
			unsigned int wxy, unsigned int wt, 
			int stepx, int stepy,
			float sigmaxy, float sigmat,
			float par1, float par2, float par3, float par6, 
			Picture *p_out, 
			vec_field *motion_field)
{
  int x,y;
  int a=(wxy-1)>>1;
  int at=(wt-1)>>1;
  //Initialize procedure (memory allocation, building look-up-tables, etc.)
  if(initi==1)
    {
      //Memory alloc for filtered pictures
      init_floatpics(t);
      
      //Memory alloc for matrices & vectors
      init_math();
      
      //Memory alloc for Gaus/Hermite LUT 
      init_gaus_hermite(wxy,wt);
      
      //Sample Gaus / Hermite filter for look up table
      build_lut_gaus_herm(a, at, sigmaxy, sigmat);
      
      //Init phase done and no change happened
      initi=0; change=0;
    }
  
  //Anything changed => new memory has to be allocated
  //=> it all happens here!!
  if(change==1)
    {
      //Memory alloc for Gaus/Hermite LUT 
      realloc_gaus_hermite(wxy,wt);
      
      //Sample Gaus / Hermite filter for new size and sigma 
      build_lut_gaus_herm(a,at,sigmaxy,sigmat);
      
      //Change happened
      change=0;
    }
  
  //Set start flag for building filtered version of input sequence
  //=>(floatpic100, ...) and that new optic flow has to  be calculated
  start_generate=1;
  
  //Calculate optic flow vectors in rectangle defined by (x0,y0)-(x1,y1)
  //for every 'stepy' & 'stepx'
  if(stepx < 1)
    stepx=1;
     
  if(stepy < 1)
    stepy=1;
  
  if(y0<a)
    y0=a;
  
  if(x0<a)
    x0=a;
  
  if((y1+a)>p_store_pic[t]->height)
    y1=p_store_pic[t]->height-a;
  
  if((x1+a) > p_store_pic[t]->width)
    x1=p_store_pic[t]->width-a;

  for(y=y0; y < y1; y+=stepy)
    {
      for(x=x0; x < x1; x+=stepx)
        {
	  //Build the equation system for the optic flow vector
	  //on position 'x','y' in picture 't' with the filter
	  //dimensions 'wxy' for spatial and 'wt' for temporal
	  build_eqation(x, y, t, wxy, wt, x0, y0, x1, y1);
	  
	  //Solve equation system for u,v = Optic flow vectors
	  solve_equation();
	  
	  //Icorporate Confidence Measurements
	  if( ( FABS(u_liu) < (at-1) || FABS(v_liu) < (at-1)) &&
	      validate_vector(par1, par2, par3) ) 
	    {
#if DEBUG
	      //Save matrices to HD  
	      if(!matrix_stop && WRITE_MATRIX)
		{
		  write_matrices("QR.txt",x,y);
		}
#endif   
	      write_in_motion_field_liu(x, y, u_liu, v_liu,
					motion_field);
	      
	    }
	}
    }
  
  //No need to save the next matrices equations
  matrix_stop=1;
  
#if SHOW_FILTER
  saveImage("000_vectorfield.pgm",p_out);
  exit(1);
#endif  
  
}

/////////////////////////////
//Build the equation system//
////////////////////////////
void build_eqation(int x, int y, int t, 
		   unsigned  int wx, unsigned int wt,
		   int x0, int y0, int x1, int y1)
{
  float w1=1.0, w2=4.0;
  
  //Matrix entries weighted with norm factor
  A[1][1]=w1*I_ijk(1,0,0, x, y, t, wx, wt, x0, y0, x1, y1);
  A[1][2]=w1*I_ijk(0,1,0, x, y, t, wx, wt, x0, y0, x1, y1);
  A[2][1]=w2*I_ijk(2,0,0, x, y, t, wx, wt, x0, y0, x1, y1);
  A[2][2]=w2*I_ijk(1,1,0, x, y, t, wx, wt, x0, y0, x1, y1);  
  A[3][1]=A[2][2];
  A[3][2]=w2*I_ijk(0,2,0, x, y, t, wx, wt, x0, y0, x1, y1);
  
  b[1]=w1*I_ijk(0,0,1, x, y, t,wx, wt, x0, y0, x1, y1);
  b[2]=w2*I_ijk(1,0,1, x, y, t,wx, wt, x0, y0, x1, y1);
  b[3]=w2*I_ijk(0,1,1, x, y, t,wx, wt, x0, y0, x1, y1);
}

////////////////////////////////////////////////////////
//Finally solve the equation sytem by QR-decomposition//
///////////////////////////////////////////////////////
void solve_equation()
{
  //A=QR decomposition
  //zero out everything in 1 col after 1 element
  //->Householder triangulation
  calc_P1(A[1][1],A[2][1],A[3][1],P1);
  
  //Build new Matrix
  matrix_mult_3x3_3x2(P1,A,P1xA);
  
  //then again zero out everything in 2 col after 2 element
  calc_P2(P1xA[2][2],P1xA[3][2],P2);
  
  //R = upper triangle matrix
  matrix_mult_3x3_3x2(P2,P1xA,R);
  
  //P2xP1xA=R => Q=P1xP2
  matrix_mult_3x3(P1,P2,Q);
  
  //Calculate bs=Qxb => Qxb[3]=r => residual
  transpose_3x3(Q);
  matrix_mult_3x3_3x1(Q,b,Qxb);

  //Finally solve for u,v  
  //horizontal component of OF; R[2][2] should be non zero 
  if(R[2][2] > EPSILON)
    v_liu = (-Qxb[2] / R[2][2]);
  else
    v_liu = INVALID;
  
  //vertical component of OF; R[1][1] should be non zero
  if(R[1][1] > EPSILON)
    u_liu = ((-Qxb[1] - v_liu * R[1][2]) / R[1][1]);
  else
    u_liu = INVALID;
}

//Sample Gaus and hermite for filter operations 
void build_lut_gaus_herm(int a, int c, float sigmaxy, float sigmat)
{
  int i;
  //Generate gaus and hermite filter masks
  for(i=-a; i<=a; i++)
    {
      hermite_xy[0][i+a]=hermite(0, i, sigmaxy);
        
      hermite_xy[1][i+a]=hermite(1, i, sigmaxy);      
            
      hermite_xy[2][i+a]=hermite(2, i, sigmaxy);
            
    }
  
  for(i=-c; i<=c; i++)
     {
       hermite_t[0][i+c]=hermite(0, i, sigmat);;
       
       hermite_t[1][i+c]=hermite(1, i, sigmat);
       
       hermite_t[2][i+c]=hermite(2, i, sigmat); 
     }
}

//Evaluate hermite poly nr. 'i' on position 'x' with 'sigma' as variance  
#define SQRT2 1.4142135624
#define SQRT2PI	2.506628275	/* sqrt (2 * PI) */
float hermite(int i, int x, float sigma)
{
  double inv_denom;
  double x_r,G;
  double std_dev;

  std_dev=sqrt(sigma);

  G=exp(-(x*x)/(2*sigma)) / (SQRT2PI * std_dev);

  inv_denom = 1.0 / (SQRT2 * std_dev);

  x_r = x * inv_denom;
  
  switch(i)
    {
    case 0: return G; break;      
      //Hermite order 1  
    case 1: return G*inv_denom * (2.0 * x_r); break;
      //Hermite order 2
    case 2: return G*(inv_denom*inv_denom)*(4.0 * x_r*x_r - 2.0); break;
      //Hermite order 3
    case 3: return G*(inv_denom*inv_denom*inv_denom)*(8.0*x_r*x_r*x_r-12.0*x_r); break;
        default: return 0; break;	
    }
  return 0;
}

///////////////////////////////
//Generate I_ijk (see theory)//
//////////////////////////////
float I_ijk(int i,int j, int k, 
	    int x, int y, int t,
	    unsigned int wx, unsigned int wt, 
	    int x0, int y0, int x1, int y1)
{   
  int a=(wx-1)>>1;
  int at=(wt-1)>>1;
  //Filter whole picture sequence at once 
  if(start_generate==1)
    { 
      //3D convolution separated into three 1D ones
#if !FAST    
      //First filter in time
      generate_f_t(a, at, x0, y0, x1, y1, t);      
      //Then filter in x - direction 
      generate_fx_t(a, x0, y0, x1, y1, t);
      //Finally filter in y-direction
      generate_fy_t(a, x0, y0, x1, y1, t); 
#else
      
      generate_f_t_fast(a, at, x0, y0, x1, y1, t);
      
      generate_fx_t_fast(a, x0, y0, x1, y1, t);
      
      generate_fy_t_fast(a, x0, y0, x1, y1, t);
#endif FAST

      //filtering done
      //filtered versions are in floatpic_y_~ 
      start_generate=0;
    }
  
  //Return filtered value for the apropriate Hermite with 
  //order 'i'+'j'+'k' at postion 'x','y' 
  //100
  if(i==1 && j==0 && k==0)
    return floatpic_y_100[y * p_store_pic[t]->width + x];
  //010
  else if(i==0 && j==1 && k==0)
    return floatpic_y_010[y * p_store_pic[t]->width + x];  
  //200
  else if(i==2 && j==0 && k==0)
    return floatpic_y_200[y * p_store_pic[t]->width + x];
  //110
  else if(i==1 && j==1 && k==0)
    return floatpic_y_110[y * p_store_pic[t]->width + x];
  //020
  else if(i==0 && j==2 && k==0)
    return floatpic_y_020[y * p_store_pic[t]->width + x];
  //001
  else if(i==0 && j==0 && k==1)
    return floatpic_y_001[y * p_store_pic[t]->width + x];
  //101
  else if(i==1 && j==0 && k==1)
    return floatpic_y_101[y * p_store_pic[t]->width + x];
  //011
  else if(i==0 && j==1 && k==1)
    return floatpic_y_011[y * p_store_pic[t]->width + x];
  else
    return -1;
}

////////////////////////////////////////
//Incorporat confidence measurements//
///////////////////////////////////////
int validate_vector(float par1, float par2, float par3)
{

  float lampda_min,lampda_max;
  float cond,determin;
  
  if(FABS(A[1][1])>EPSILON && FABS(A[1][2])>EPSILON)
    {
      //Confidence Measurement
      lampda_min = FMIN(R[2][2],R[1][1]);
      lampda_max = FMAX(R[2][2],R[1][1]);
      
      //Qxb[3] => Residual 
      //High residual = indicates the degree to which the
      //equations disagree with one another =>   
      
      //1) assumption of motion model is violated window covers
      //more than on moving object, occlusion, expansion 
      
      //2) constant image brightness violated 
      
      //3) Quantization and truncation errors (=limited spatial support to
      //compute Hermite polynomials => hermite polynoms are not longer
      //orthogonal and derivatives are not accurate )
      
      //High condition number: linear system maps input error
      //into output errors => numerical instability of system      
      if(FABS(lampda_min) > EPSILON )
	cond = FABS(lampda_max)/FABS(lampda_min);
      else
	cond = INVALID;
      
      //High determinante reflects instability
      determin=R[1][1]*R[2][2];
      
      //Confidence Measurements and some parameters to play with
      //First single conf measurements
      if((lampda_min > par1) && par1!=0.0 && par2==0.0 && par3==0.0) 
	return 1;
      else if ((cond > par2) && par1==0.0 && par2!=0.0 && par3==0.0)
	return 1;
      else if ((Qxb[3] < par3) && par1==0.0 && par2==0.0 && par3!=0.0)
	return 1;
      
      //Combined measurements
      else if((cond > par2 ) && (lampda_min > par1) 
	      && par1!=0.0 && par2!=0.0  && par3==0.0)
	return 1;
      else if((cond > par2 )  && (Qxb[3] < par3)
	      && par1==0.0 && par2!=0.0  && par3!=0.0)
	return 1;
      else if((lampda_min > par1) && (Qxb[3] < par3)
	      && par1!=0.0 && par2==0.0  && par3!=0.0)
	return 1;
      
      //All conf meas combined
      else if((lampda_min > par1) 
	      && (Qxb[3]  < par3) 
	      && (cond    > par2)  
	      && par1!=0.0 && par2!=0.0  && par3!=0.0)
	return 1;
      
      //No validation at all
      else if(par1==0.0 && par2==0.0  && par3==0.0)
	return 1;
      else
	return 0;
      
    }
  else
    return 0;
  
}

#if !FAST 
//Temporal filtering 
void  generate_f_t(int a, int c, int x0, int y0, int x1, int y1, int t)
{
  int x,y,ni,tl,tl1;
  float tmp_001,tmp_100;
  int x0_t, y0_t, x1_t, y1_t;
  
  //Do not leave the picture
  if(x0 < a )
    x0_t=0;
  else
    x0_t=x0-a;
  
  if(y0 < a)
    y0_t=0;
  else
    y0_t=y0-a;
  
  if((x1+a) > p_store_pic[t]->width)
    x1_t=p_store_pic[t]->width;
  else
    x1_t=x1+a;
  
  if((y1+a) > p_store_pic[t]->height)
    y1_t=p_store_pic[t]->height;
  else
    y1_t=y1+a;
  
  for(y=y0_t; y < y1_t; y++)
    {
      for(x=x0_t; x < x1_t; x++)
	{
     	  tmp_001=0.0;tmp_100=0.0;
	  for(ni=-c; ni<=c; ni++ )
	    {
	      tl=(t+ni+(c<<1)+1)%((c<<1)+1);
	      tl1=y * p_store_pic[t]->width + x;
	      
	      tmp_001+= (float) p_store_pic[tl]->data[tl1] 
		* hermite_t[1][ni+c];
	      
	      tmp_100+= (float) p_store_pic[tl]->data[tl1] 
		* hermite_t[0][ni+c];
	    }
	  
	  tl = y * p_store_pic[t]->width + x;
	  
	  floatpic_t_001[tl]=tmp_001;
	  floatpic_t_011[tl]=tmp_001;
	  floatpic_t_101[tl]=tmp_001;
	  
	  floatpic_t_110[tl]=tmp_100;  
	  floatpic_t_100[tl]=tmp_100;
	  floatpic_t_010[tl]=tmp_100;
	  floatpic_t_200[tl]=tmp_100;
	  floatpic_t_020[tl]=tmp_100;
	}
    }
#if SHOW_FILTER  
  make_pic_and_save(floatpic_t_001,"01_001t.pgm",x0_t,y0_t,x1_t,y1_t);
  make_pic_and_save(floatpic_t_100,"02_100t.pgm",x0_t,y0_t,x1_t,y1_t);
#endif
}

//spatial filtering in x direction
void  generate_fx_t(int a, int x0, int y0, int x1, int y1,int t)
{
  int x,y,pj,tl;
  float tmp_100,tmp_101,tmp_010,tmp_011,tmp_200;
  
  int y0_x,y1_x;
  
  if(y0 < a)
    y0_x=0;
  else
    y0_x=y0-a;
  
  if((y1+a) > p_store_pic[t]->height)
    y1_x=p_store_pic[t]->height;
  else
    y1_x=y1+a;
  
  for(y = y0_x; y < y1_x; y++)
    { 
      for(x = x0; x < x1; x++)
	{
	  tmp_100=0.0;tmp_101=0.0;
	  tmp_010=0.0;tmp_011=0.0; tmp_200=0.0;
	  for(pj=-a; pj<=a; pj++ )
	    {
	      tl=y * p_store_pic[t]->width + (x+pj);
	      
	      tmp_100+=floatpic_t_100[tl] 
		* hermite_xy[1][pj+a];
	      
	      tmp_101+=floatpic_t_101[tl] 
		* hermite_xy[1][pj+a];
	      
	      tmp_010+=floatpic_t_010[tl]
		* hermite_xy[0][pj+a];
	      
	      tmp_011+=floatpic_t_011[tl]
		* hermite_xy[0][pj+a];
	      
	      tmp_200+=floatpic_t_200[tl]
	      	* hermite_xy[2][pj+a];
	    }
	  
	  tl=y * p_store_pic[t]->width + x ;
	  
	  floatpic_x_001[tl]=tmp_011;
	  floatpic_x_011[tl]=tmp_011;
	  
	  floatpic_x_101[tl]=tmp_101;
	  
	  floatpic_x_100[tl]=tmp_100;
	  floatpic_x_110[tl]=tmp_100;
	  
	  floatpic_x_010[tl]=tmp_010;
	  floatpic_x_020[tl]=tmp_010;
	  
	  floatpic_x_200[tl]=tmp_200;
	}
      }  
#if SHOW_FILTER
   make_pic_and_save(floatpic_x_100,"03_100x.pgm",x0,y0_x,x1,y1_x); 
   make_pic_and_save(floatpic_x_101,"04_101x.pgm",x0,y0_x,x1,y1_x);
   make_pic_and_save(floatpic_x_010,"05_010x.pgm",x0,y0_x,x1,y1_x);
   make_pic_and_save(floatpic_x_011,"06_011x.pgm",x0,y0_x,x1,y1_x);
   make_pic_and_save(floatpic_x_200,"07_200x.pgm",x0,y0_x,x1,y1_x);
#endif
}
//spatial filtering in y direction
void  generate_fy_t(int a, int x0, int y0, int x1, int y1,int t)
{
  int x,y,n,tl;
  float tmp_100,tmp_101,tmp_010,tmp_011,tmp_200,tmp_001,tmp_020,tmp_110;
  
  
  if(y0 < a)
    y0=a;
  
  if((y1+a) > p_store_pic[t]->height)
    y1=p_store_pic[t]->height-a;

  if(x0 < a)
    x0=a;
  
  if((x1+a) > p_store_pic[t]->width)
    x1=p_store_pic[t]->width-a;

  for(y=y0; y<y1; y++)
    {
      for(x=x0; x<x1; x++)
	{
	  tmp_100=0.0; tmp_101=0.0; tmp_010=0.0; tmp_110=0.0;
	  tmp_011=0.0; tmp_200=0.0; tmp_001=0.0; tmp_020=0.0;
	  for(n=-a; n<=a; n++ )
	    { 
	      tl=(y+n) * p_store_pic[t]->width + x ;
	      
	      tmp_100+=floatpic_x_100[tl] 
	      * hermite_xy[0][n+a];
	      
	      tmp_110+=floatpic_x_110[tl] 
	      * hermite_xy[1][n+a];
	      
	      tmp_101+=floatpic_x_101[tl] 
		* hermite_xy[0][n+a];
	      
	      tmp_010+=floatpic_x_010[tl]
		* hermite_xy[1][n+a];
	      
	      tmp_020+=floatpic_x_020[tl]
		* hermite_xy[2][n+a];
	      
	      tmp_001+=floatpic_x_001[tl]
		* hermite_xy[0][n+a];
	      
	      tmp_011+=floatpic_x_011[tl]
		* hermite_xy[1][n+a];
	      
	      tmp_200+=floatpic_x_200[tl]
		* hermite_xy[0][n+a] ;
	    }	  
	  tl=y * p_store_pic[t]->width + x;
	  
	  //Resultat
	  floatpic_y_001[tl]=tmp_001;
	  
	  floatpic_y_011[tl]=tmp_011;
	  
	  floatpic_y_110[tl]=tmp_110;
	  
	  floatpic_y_101[tl]=tmp_101;
	  
	  floatpic_y_100[tl]=tmp_100;
	  
	  floatpic_y_010[tl]=tmp_010;
	  
	  floatpic_y_020[tl]=tmp_020;
	  
	  floatpic_y_200[tl]=tmp_200;
	}
  
      } 
#if SHOW_FILTER
  make_pic_and_save(floatpic_y_100,"08_100y.pgm",x0,y0,x1,y1);
  make_pic_and_save(floatpic_y_110,"09_110y.pgm",x0,y0,x1,y1);   
  make_pic_and_save(floatpic_y_101,"10_101y.pgm",x0,y0,x1,y1);  
  make_pic_and_save(floatpic_y_010,"11_010y.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_020,"12_020y.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_001,"13_001y.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_011,"14_011y.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_200,"15_200y.pgm",x0,y0,x1,y1);
#endif
}
#endif

//Temporal filtering faster 
void  generate_f_t_fast(int a, int c, int x0, int y0, int x1, int y1, int t)
{
  int x,y,ni,tl,tl1, offset;
  float tmp_001,tmp_100;
  int x0_t=x0-a, y0_t=y0-a, x1_t=x1+a, y1_t=y1+a;
  
  float *pp_100,*pp_001;
  float *ppfinal;
  
  //Lets go
  for(y=y0_t; y < y1_t; y++)
    {
      offset= y*p_store_pic[t]->width+x0_t;
    
      pp_001=floatpic_t_001+offset;
      pp_100=floatpic_t_100+offset;
      
      ppfinal = pp_001 + (x1_t-x0_t);
      
      x=x0_t;
      while(pp_001 < ppfinal)
	{
     	  tmp_001=0.0; tmp_100=0.0;
	  for(ni=-c; ni<=c; ni++ )
	    {
	      //Which picture in buffer?
	      tl=(t+ni+(c<<1)+1)%((c<<1)+1);
	      
	      tl1=y * p_store_pic[t]->width + x;
		 
	      tmp_001+= (float) p_store_pic[tl]->data[tl1] 
		* hermite_t[1][ni+c];
	      
	      tmp_100+= (float) p_store_pic[tl]->data[tl1] 
		* hermite_t[0][ni+c];
	    }
	   x++;
	  *pp_001++=tmp_001;
	  
	  *pp_100++=tmp_100;
	}
    }
#if SHOW_FILTER  
  make_pic_and_save(floatpic_t_001,"01_001t_fast.pgm",x0_t,y0_t,x1_t,y1_t);
  make_pic_and_save(floatpic_t_100,"02_100t_fast.pgm",x0_t,y0_t,x1_t,y1_t);
#endif
}


//spatial filtering in x direction faster
void  generate_fx_t_fast(int a, int x0, int y0, int x1, int y1,int t)
{
  int y, pj, offset;
  float tmp_100, tmp_101, tmp_010, tmp_011, tmp_200;
  
  float *pp_101, *pp_011, *pp_100;
  float *pp_010, *pp_200;
  float *ppfinal;
  float *pp_t_100,*pp_t_001;
  
  int y0_x=y0-a, y1_x=y1+a;
  
  for(y = y0_x; y < y1_x; y++)
    { 
      offset= y*p_store_pic[t]->width+x0;
      
      pp_101=floatpic_x_101+offset;
      pp_011=floatpic_x_011+offset;
      pp_100=floatpic_x_100+offset;
      pp_010=floatpic_x_010+offset;
      pp_200=floatpic_x_200+offset;
       
      pp_t_100=floatpic_t_100+offset;
      pp_t_001=floatpic_t_001+offset;
      
      ppfinal = pp_101 + (x1-x0);

      while(pp_101 < ppfinal)
	{
	  tmp_100=0.0; tmp_101=0.0;
	  tmp_010=0.0; tmp_011=0.0; tmp_200=0.0;
	  
	  for(pj=-a; pj<=a; pj++ )
	    { 	      
	      tmp_100+=*(pp_t_100+pj) * hermite_xy[1][pj+a];
	      
	      tmp_101+=*(pp_t_001+pj) * hermite_xy[1][pj+a];
	      
	      tmp_010+=*(pp_t_100+pj) * hermite_xy[0][pj+a];
  
	      tmp_011+=*(pp_t_001+pj) * hermite_xy[0][pj+a];
	      
	      tmp_200+=*(pp_t_100+pj) * hermite_xy[2][pj+a];
	    }
	  
	  *pp_011++=tmp_011;
	  
	  *pp_101++=tmp_101;
	  
	  *pp_100++=tmp_100;
	  	  
	  *pp_010++=tmp_010;
	  	  
	  *pp_200++=tmp_200;
	  
	  //Increase picture pointer
	  pp_t_100++;
	  pp_t_001++;
	  
	} //end of while
    } 
  
#if SHOW_FILTER
  make_pic_and_save(floatpic_x_100,"03_100_fastx.pgm",x0,y0_x,x1,y1_x); 
  make_pic_and_save(floatpic_x_101,"04_101_fastx.pgm",x0,y0_x,x1,y1_x);
  make_pic_and_save(floatpic_x_010,"05_010_fastx.pgm",x0,y0_x,x1,y1_x);
  make_pic_and_save(floatpic_x_011,"06_011_fastx.pgm",x0,y0_x,x1,y1_x);
  make_pic_and_save(floatpic_x_200,"07_200_fastx.pgm",x0,y0_x,x1,y1_x);
#endif 
}

//spatial filtering in y direction faster
void  generate_fy_t_fast(int a, int x0, int y0, int x1, int y1,int t)
{
  int y,n, w, offset;
  float tmp_100,tmp_101,tmp_010,tmp_011;
  float tmp_200,tmp_001,tmp_020,tmp_110;

  float *pp_101, *pp_011, *pp_001, *pp_100,*pp_110;
  float *pp_010, *pp_020, *pp_200;
  float *ppfinal;
  float *pp_x_100, *pp_x_101, *pp_x_011, *pp_x_200,*pp_x_010;
   
  int width=p_store_pic[t]->width;
  
  for(y=y0; y<y1; y++)
    {
      offset= y * width + x0;
      
      pp_101=floatpic_y_101+offset;
      pp_011=floatpic_y_011+offset;
      pp_001=floatpic_y_001+offset;
      pp_100=floatpic_y_100+offset;
      pp_110=floatpic_y_110+offset;
      pp_010=floatpic_y_010+offset;
      pp_020=floatpic_y_020+offset;
      pp_200=floatpic_y_200+offset;
      
      pp_x_100=floatpic_x_100+offset;
      pp_x_101=floatpic_x_101+offset;
      pp_x_010=floatpic_x_010+offset;
      pp_x_011=floatpic_x_011+offset;
      pp_x_200=floatpic_x_200+offset;
      
      ppfinal = pp_101 + (x1-x0);
      
      while(pp_101 < ppfinal)
	{
	  tmp_100=0.0; tmp_101=0.0; tmp_010=0.0; tmp_110=0.0;
	  tmp_011=0.0; tmp_200=0.0; tmp_001=0.0; tmp_020=0.0;
	  
	  for(n=-a; n<=a; n++ )
	    { 
	      w=n*width;
	      
	      tmp_100+=*(pp_x_100+w) * hermite_xy[0][n+a];
	      
	      tmp_110+=*(pp_x_100+w) * hermite_xy[1][n+a];
 
	      tmp_101+=*(pp_x_101+w) * hermite_xy[0][n+a];

	      tmp_010+=*(pp_x_010+w) * hermite_xy[1][n+a];
	      
	      tmp_020+=*(pp_x_010+w) * hermite_xy[2][n+a];

	      tmp_001+=*(pp_x_011+w) * hermite_xy[0][n+a];

	      tmp_011+=*(pp_x_011+w) * hermite_xy[1][n+a];
	    
	      tmp_200+=*(pp_x_200+w) * hermite_xy[0][n+a];
	    }	  
	  
	  *pp_001++=tmp_001;
	  
	  *pp_011++=tmp_011;
	  
	  *pp_101++=tmp_101;
	  
	  *pp_100++=tmp_100;
	  
	  *pp_110++=tmp_110;
	  
	  *pp_010++=tmp_010;
	  
	  *pp_020++=tmp_020;
	  
	  *pp_200++=tmp_200; 
	  
	  //Increase picture pointer
	  pp_x_100++;
	  pp_x_101++;
	  pp_x_010++;
	  pp_x_011++;
	  pp_x_200++;
	}
  
      } 

#if SHOW_FILTER
  make_pic_and_save(floatpic_y_100,"08_100y_fast.pgm",x0,y0,x1,y1);
  make_pic_and_save(floatpic_y_110,"09_110y_fast.pgm",x0,y0,x1,y1);   
  make_pic_and_save(floatpic_y_101,"10_101y_fast.pgm",x0,y0,x1,y1);  
  make_pic_and_save(floatpic_y_010,"11_010y_fast.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_020,"12_020y_fast.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_001,"13_001y_fast.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_011,"14_011y_fast.pgm",x0,y0,x1,y1); 
  make_pic_and_save(floatpic_y_200,"15_200y_fast.pgm",x0,y0,x1,y1);
#endif
}

////////////////////////////////
//Here comes the boring stuff //
///////////////////////////////
//malloc for matrices and vectors
void init_math()
{
  b=vectorf(1,3);
  Qxb=vectorf(1,3);
  
  A=matrixf(1,3,1,2);
  /*QR*/
  R=matrixf(1,3,1,2);
  Q=matrixf(1,3,1,3);
      
  /*Tmp*/
  P1=matrixf(1,3,1,3);
  P2=matrixf(1,3,1,3);
  
  P1xA=matrixf(1,3,1,2);
}

//Malloc for filtered pictures
void init_floatpics(int t)
{
  int fp_size=p_store_pic[t]->width*p_store_pic[t]->height*sizeof(float);

#if !FAST
  floatpic_t_010=(float *)malloc((size_t) fp_size);
  floatpic_t_200=(float *)malloc((size_t) fp_size);
  floatpic_x_110=(float *)malloc((size_t) fp_size);
  floatpic_t_110=(float *)malloc((size_t) fp_size);
  floatpic_x_020=(float *)malloc((size_t) fp_size);
  floatpic_t_020=(float *)malloc((size_t) fp_size);
  floatpic_x_001=(float *)malloc((size_t) fp_size);
  floatpic_t_001=(float *)malloc((size_t) fp_size);
  floatpic_t_101=(float *)malloc((size_t) fp_size);
  floatpic_t_011=(float *)malloc((size_t) fp_size);
#endif

  floatpic_t_100=(float *)malloc((size_t) fp_size);
  floatpic_t_001=(float *)malloc((size_t) fp_size);

  floatpic_x_011=(float *)malloc((size_t) fp_size);
  floatpic_x_100=(float *)malloc((size_t) fp_size);
  floatpic_x_101=(float *)malloc((size_t) fp_size);
  floatpic_x_200=(float *)malloc((size_t) fp_size);
  floatpic_x_010=(float *)malloc((size_t) fp_size);

  floatpic_y_110=(float *)malloc((size_t) fp_size);
  floatpic_y_020=(float *)malloc((size_t) fp_size);
  floatpic_y_200=(float *)malloc((size_t) fp_size);
  floatpic_y_001=(float *)malloc((size_t) fp_size);
  floatpic_y_101=(float *)malloc((size_t) fp_size); 
  floatpic_y_011=(float *)malloc((size_t) fp_size);
  floatpic_y_100=(float *)malloc((size_t) fp_size);
  floatpic_y_010=(float *)malloc((size_t) fp_size);
}

//Malloc for gaus / hermite filter
void init_gaus_hermite(unsigned int wxy, unsigned int wt)
{
  int i;
  
  //Spatial filter must be bigger than temporal
  if(wxy >= wt)
    {
      //malloc gaus table
      gausixy=(float *) malloc(wxy*sizeof(float));
      if(gausixy==NULL)
	{
	  fprintf(stdout,"Doh!");
	  exit(1);
	}
      
      gausit=(float *) malloc(wxy*sizeof(float));
      if(gausit==NULL)
	{
	  fprintf(stdout,"Doh!");
	  exit(1);
	}
      
      //malloc hermit table
      hermite_xy=(float **) malloc(3*sizeof(float*));
      if(hermite_xy==NULL)
	{
	  fprintf(stdout,"Doh!");
	  exit(1);
	}
      
      hermite_t=(float **) malloc(3*sizeof(float*));
      if(hermite_t==NULL)
	{
	  fprintf(stdout,"Doh!");
	  exit(1);
	}
      
      for(i=0;i < 3;i++) 
	{
	  hermite_xy[i]=(float *)malloc(wxy*sizeof(float));
	  if(hermite_xy[i]==NULL)
	    {
	      fprintf(stdout,"Doh!");
	      exit(1);
	    }
	  
	  hermite_t[i]=(float *)malloc(wxy*sizeof(float));
	  if(hermite_t[i]==NULL)
	    {
	      fprintf(stdout,"Doh!");
	      exit(1);
	    }
	}
    }
}

//3x3 3x2 matrix multiplication
void matrix_mult_3x3_3x2(float **P1,float **A, float **K)
{  
  K[1][1]=A[1][1]*P1[1][1]+A[2][1]*P1[1][2]+A[3][1]*P1[1][3];
  K[1][2]=A[1][2]*P1[1][1]+A[2][2]*P1[1][2]+A[3][2]*P1[1][3];
  
  K[2][1]=A[1][1]*P1[2][1]+A[2][1]*P1[2][2]+A[3][1]*P1[2][3];
  K[2][2]=A[1][2]*P1[2][1]+A[2][2]*P1[2][2]+A[3][2]*P1[2][3];
  
  K[3][1]=A[1][1]*P1[3][1]+A[2][1]*P1[3][2]+A[3][1]*P1[3][3];
  K[3][2]=A[1][2]*P1[3][1]+A[2][2]*P1[3][2]+A[3][2]*P1[3][3]; 
}

//3x3 matrix multiplication
void matrix_mult_3x3(float **P1, float **A, float **Q)
{   
  Q[1][1]=A[1][1]*P1[1][1]+A[2][1]*P1[1][2]+A[3][1]*P1[1][3];
  Q[1][2]=A[1][2]*P1[1][1]+A[2][2]*P1[2][1]+A[3][2]*P1[1][3];
  Q[1][3]=A[1][3]*P1[1][1]+A[2][3]*P1[1][2]+A[3][3]*P1[1][3];

  Q[2][1]=A[1][1]*P1[2][1]+A[2][1]*P1[2][2]+A[3][1]*P1[2][3];
  Q[2][2]=A[1][2]*P1[2][1]+A[2][2]*P1[2][2]+A[3][2]*P1[2][3];
  Q[2][3]=A[1][3]*P1[2][1]+A[2][3]*P1[2][2]+A[3][3]*P1[2][3];

  Q[3][1]=A[1][1]*P1[3][1]+A[2][1]*P1[3][2]+A[3][1]*P1[3][3];
  Q[3][2]=A[1][2]*P1[3][1]+A[2][2]*P1[3][2]+A[3][2]*P1[3][3];
  Q[3][3]=A[1][3]*P1[3][1]+A[2][3]*P1[3][2]+A[3][3]*P1[3][3];

}

//Build transpose of 3x3 matrix 'A'
#define SWAP(a,b) {float temp=(a); (a)=(b); (b)=temp;} 
void transpose_3x3(float **A)
{
  SWAP(A[1][2],A[2][1]);
  SWAP(A[1][3],A[3][1]);
  SWAP(A[2][3],A[3][2]);
}

//matrix='Q' vector='b' multiplication 
void matrix_mult_3x3_3x1(float **Q, float *b,float *Q_b)
{
  Q_b[1]=Q[1][1]*b[1]+Q[1][2]*b[2]+Q[1][3]*b[3];
  Q_b[2]=Q[2][1]*b[1]+Q[2][2]*b[2]+Q[2][3]*b[3];
  Q_b[3]=Q[3][1]*b[1]+Q[3][2]*b[2]+Q[3][3]*b[3];
}

//Build matrix to zero out everything under 1 element
void calc_P1(float a11, float a21, float a31, float **P1)
{
  float *u;
  float inv_norm_ux2;
  
  u=vectorf(1,3);

  u[1]=a11 - sqrt(a11*a11+a21*a21+a31*a31);
  u[2]=a21;
  u[3]=a31;
  
  inv_norm_ux2= -2.0 / (u[1]*u[1] + u[2]*u[2] + u[3]*u[3]);
  
  P1[1][1] = 1.0 + inv_norm_ux2*(u[1]*u[1]);
  P1[1][2] = inv_norm_ux2*(u[1]*u[2]);
  P1[1][3] = inv_norm_ux2*(u[1]*u[3]);
  
  P1[2][1] = inv_norm_ux2*(u[2]*u[1]);
  P1[2][2] = 1.0 + inv_norm_ux2*(u[2]*u[2]);
  P1[2][3] = inv_norm_ux2*(u[2]*u[3]);

  P1[3][1] = inv_norm_ux2*(u[3]*u[1]);
  P1[3][2] = inv_norm_ux2*(u[3]*u[2]);
  P1[3][3] = 1.0 + inv_norm_ux2*(u[3]*u[3]);
  
  freevectorf(u,1,2);
}

//Calculate second matrix to zero out second column
void calc_P2(float a11, float a21, float **P2)
{
  float *u;
  float inv_norm_ux2;

  u=vectorf(1,2);

  u[1]=a11 - sqrt(a11*a11+a21*a21);
  u[2]=a21;

  inv_norm_ux2= -2.0 / (u[1]*u[1] + u[2]*u[2]);
  
  P2[1][1] = 1.0;
  P2[1][2] = 0;
  P2[1][3] = 0;
  
  P2[2][1] = 0;
  P2[2][2] = 1.0 + inv_norm_ux2*(u[1]*u[1]);
  P2[2][3] = inv_norm_ux2*(u[2]*u[1]);

  P2[3][1] = 0;
  P2[3][2] = inv_norm_ux2*(u[1]*u[2]);
  P2[3][3] = 1.0 + inv_norm_ux2*(u[2]*u[2]);

  freevectorf(u,1,2);
}

//Realloc Filter size
void realloc_gaus_hermite(int wxy, int wt)
{
  int i;
  
  //Spatial filter must be bigger than temporal
  if(wxy >= wt)
    {
      //malloc gaus table
      gausixy=(float *) realloc(gausixy,wxy*sizeof(float));
      if(gausixy==NULL)
	exit(1);
      
      gausit=(float *) realloc(gausit,wxy*sizeof(float));
      if(gausit==NULL)
	exit(1);
      
      for(i=0;i < 3;i++) 
	{
	  hermite_xy[i]=(float *)realloc(hermite_xy[i],wxy*sizeof(float));
	  if(hermite_xy[i]==NULL)
	    exit(1);
	  
	  hermite_t[i]=(float *)realloc(hermite_t[i],wxy*sizeof(float));
	  if(hermite_t[i]==NULL)
	    exit(1);
	}
    }
}

//Fill optic flow field with values
void write_in_motion_field_liu(int x, int y, 
			       float u, float v, 
			       vec_field *liu_field)
{
  int tl=y * liu_field->width + x;
  
  liu_field->u[tl]=u;
  liu_field->v[tl]=v;
  
  liu_field->valid[tl]=1;  
}

#if DEBUG
//Write matrices to disc
void write_matrices(char *fname,int x1,int y1)
{
  int i,j;
  
  FILE *fp;
  
  if(b[0]>0.0 || b[1]>0.0 || b[2]>0.0)
    {
      fp = fopen(fname, "a");
      
      fprintf(fp,"A=[\n ");
      for(i=1;i<=3;i++)
	{
	  for(j=1;j<=2;j++)
	    {
	      fprintf(fp,"%f ",A[i][j]);
	}
	  fprintf(fp,"\n");
	}
  fprintf(fp,"]\n ");
      
      fprintf(fp,"\n");
      fprintf(fp,"R:\n ");
      for(i=1;i<=3;i++)
	{
	  for(j=1;j<=2;j++)
	    {
	      fprintf(fp,"%f ",R[i][j]);
	    }
	  fprintf(fp,"\n");
	}
      
      fprintf(fp,"\n");
      fprintf(fp,"Q:\n ");
      for(i=1;i<=3;i++)
	{
	  for(j=1;j<=3;j++)
	    {
	      fprintf(fp,"%f ",Q[i][j]);
	}
	  fprintf(fp,"\n");
	}
      
      fprintf(fp,"\n");
      fprintf(fp,"b=[\n ");
      for(i=1;i<=3;i++)
	{
      fprintf(fp,"%f \n",b[i]);
	}
      fprintf(fp,"]\n ");

      fprintf(fp,"\n");
      fprintf(fp,"Qxb:\n ");
      for(i=1;i<=3;i++)
	{
	  fprintf(fp,"%f \n",Qxb[i]);
	}
      fprintf(fp,"\n");
      fprintf(fp,"LampdaMin: %f LampdaMax: %f\n",FMIN(R[2][2],R[1][1]),
	      FMAX(R[2][2],R[1][1]));
      fprintf(fp,"Cond: %f\n",FABS(FMAX(R[2][2],
					R[1][1])/FMIN(R[2][2],R[1][1])));
      fprintf(fp,"Det: %f\n",R[1][1]*R[2][2]);
      
      fprintf(fp,"\n");
      fprintf(fp,"x:%d  u = %f\n",x1,
	      (-Qxb[1]-(-Qxb[2]/R[2][2])*R[1][2])/R[1][1]);
      
      fprintf(fp,"y:%d  v = %f\n",y1,-Qxb[2]/R[2][2]);
      
      fprintf(fp,"\n-----------------\n");
      fclose(fp); 
    }
  

}

Picture *make_pic_and_save(float *fp,char *fname, 
		      int x0, int y0, int x1, int y1)
{
  Picture *image;
  int x=0,y=0;
  float max; 
  float min;  

  image=newImage();
  setImageType(image,pix_grey);
  resizeImage(image,p_store_pic[0]->width,p_store_pic[0]->height);
  clearImage(image);
  
  //Normalize
  max=min=fp[y *p_store_pic[0]->width + x ];
  
  for(y=y0;y<y1;y++)
    {
      for(x=x0;x<x1;x++)
	{
	  if(fp[y *p_store_pic[0]->width +x ] > max)
	    max=fp[y*p_store_pic[0]->width + x ];
	  
	  if(fp[y *p_store_pic[0]->width +x ] < min)
	    min=fp[y *p_store_pic[0]->width +x ];
	}
    }

  for(y=y0;y<y1;y++)
    {
      for(x=x0;x<x1;x++)
	{
	  image->data[y * p_store_pic[0]->width + x]=(unsigned char)
	    (FABS(fp[y *p_store_pic[0]->width +x ])/(max-min)*255);
	}
    }
  
  //fprintf(stdout,"%s: max:%f  min:%f\n",fname,max,min);
  
  saveImage(fname,image);

  return image;
}
#endif 
