/* OPTICAL FLOW ROUTINE after T. CAMUS
 *
 * implemented by Uli Mezger Juni,2000
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "../FW/labImage.h"
#include "camus.h"
//Include MMX
#include "mmx.h"

#define NO_MMX 0

//Max frames to store
#ifndef MAX_S 
#define MAX_S 20
#endif

//Control variables
// extern int count_pic;

//Frame buffer
static Picture *p_store_pic[MAX_S+1];
//intermidiate results pictures
static Picture *p_diff_pic,*p_shifted_pic;
static field *motion_field;
static int *ip_tmp1, *ip_tmp2;

//Counter for the ringbuffer
static int ringbuffer;

//Sizes
static int size_p; 
static int width_mfield,height_mfield,size_result;
static int mmx=0;

int calculate_optic_flow(Picture *p_src, 
			 int patch_size, 
			 int S,
			 vec_field *output_field,
			 int* count_pic)
{
  int dir,frameNr;
  int i;
  // 1 Pixel border on all sides because
  // of shifting
  int startx=1,starty=1;
  int sub_width=1,sub_height=1;

  //Flags to indicate start of algorithm
  int start_search=1,start_shift=1;
  
  if(S>=MAX_S)
    {
      S=MAX_S;
      fprintf(stdout,"S>MAX_S => No way, baby!!\n");
    }
  
  //Initiate optic flow
  if(*count_pic==0)
    {
      if(p_diff_pic)
	{
	  freeImage(p_diff_pic);
	  freeImage(p_shifted_pic);
	  free(ip_tmp1); free(ip_tmp2);
	}
 
      if ( detect_mmx() )
	{
	  fprintf(stdout,"MMX is detected !\n");
	  mmx=1;
	}      
      else
	{
	  fprintf(stdout,"MMX is NOT detected !\n");     
	  mmx=0;
	}
      
      //Init p_diff_pic = difference picture
      p_diff_pic=newImage();
      setImageType(p_diff_pic,pix_grey);
      resizeImage(p_diff_pic,p_src->width,p_src->height);
      
      //Init p_shifted_pic = shifted picture
      p_shifted_pic=newImage();
      setImageType(p_shifted_pic,pix_grey);
      resizeImage(p_shifted_pic,p_src->width,p_src->height);
      
      //Calc image size
      size_p=p_diff_pic->datasize;
      
      //Allocate memory for column summation
      ip_tmp1=(int *) malloc(size_p*sizeof(int));
      
      //Allocate memory for row summation
      ip_tmp2=(int *) malloc(size_p*sizeof(int)); 
      
      //Allocate memory for motion field 
      motion_field=(field *) malloc(size_p*sizeof(field));	
      if(ip_tmp1==NULL || ip_tmp2==NULL || motion_field==NULL)
	{
	  fprintf(stdout,"Malloc Error! => exit(1)");
	  exit(1);
	}
      
      //Set ringbuffer counter
      ringbuffer=S;
    }
  
  // Fill buffer 
  if( *count_pic < S )
  {
    printf("Buffer: %d\n",(S-(*count_pic)));
    
    p_store_pic[*count_pic]=copyImage(p_src);
    
    if(p_store_pic[*count_pic]->data==NULL)
      fprintf(stdout,"Doh! -> Could not save picture!\n");
    else     
      (*count_pic)++;
    
    return 0;
  }
  //Buffer full ?=> Start processing
  else
    { 
      //copy actual test frame
      p_store_pic[ringbuffer]=copyImage(p_src);
      
      //size of motion field-array 
      width_mfield=(p_diff_pic->width-startx-sub_width-(patch_size-1));
      height_mfield=(p_diff_pic->height-starty-sub_height-(patch_size-1));
      size_result=width_mfield*height_mfield;
      
      //Set search-flag
      start_search=1;
      
      //Search in NESW direction for 1 pixel and for no motion
      for(dir=8;dir>-1;dir--)
	{
	  //Indicate to shift picture
	  start_shift=1;
	  
	  //Search in time for S frames
	  for(frameNr=1; frameNr<=S ;frameNr++)
	    {
	      if(start_shift==1)
		{
		  // Shift actual frame
		  //p_shifted = shifted version of p_store_pic
		  shift(p_store_pic[ringbuffer],// actual test frame 
			p_shifted_pic,  //shifted picture
			dir);
		  
		  //picture already shifted? 
		  start_shift=0;
		}
	      
	      //build difference picture
	      //p_diff_pic=difference picture
	      //MMX or not MMX?
	      if ( mmx==0 )        
		diffi(p_store_pic[(ringbuffer+(S+1)-frameNr)%(S+1)],
		      p_shifted_pic,
		      p_diff_pic);
	      else      
		ImgAbsDiff(p_store_pic[(ringbuffer+(S+1)-frameNr)%(S+1)]->data, 
			   p_shifted_pic->data,
			   p_diff_pic->data, p_shifted_pic->datasize);
	      
	      //Column summation
	      partial_row_sum(p_diff_pic,
			      p_diff_pic->width-sub_width,
			      p_diff_pic->height-sub_height,
			      sub_width,
			      startx,starty,
			      patch_size,
			      ip_tmp1);
	      
	      //Row summation
	      partial_col_sum(ip_tmp1,
			      (p_diff_pic->width-startx
			       -sub_width)-(patch_size-1),
			      (p_diff_pic->height-starty-sub_height),
			      patch_size,
			      ip_tmp2);
	      
	      //Initiate *motion_field[]
	      if(start_search)
		{
		  for (i=0; i < size_result; i++)
		    {
		      //Write Value
		      motion_field[i].win_value=ip_tmp2[i];
		       
		      //Write direction
		      motion_field[i].win_dir=dir;
		      
		      //Write velocity
		      motion_field[i].win_frameNr=frameNr;
		      
		      //Value inconsistent
		      motion_field[i].incon_flag=0;
		    }
		  //motion_field initiated
		  start_search=0;
		}
	      //Compare and update motion field
	      else
		{
		  for (i=0; i < size_result; i++)
		    {
		      //Found better match?
		      if(motion_field[i].win_value > ip_tmp2[i])
			{
			  //Update Match-value
			  motion_field[i].win_value=ip_tmp2[i];
			  
			  //Update direction
			  motion_field[i].win_dir=dir;
			  
			  //Update velocity
			  motion_field[i].win_frameNr=frameNr;
			  
			  //Flag
			  motion_field[i].incon_flag=0;
			}
		      //Inconsistency
		      else if(motion_field[i].win_value==ip_tmp2[i])
			{//Same patch found in later frame?
			  if(motion_field[i].win_frameNr<frameNr)
			    { 
			      //Update velocity
			      motion_field[i].win_frameNr=frameNr;
			      
			      //Flag
			      motion_field[i].incon_flag=0;
			    }
			  else
			    //Indicate inconsistency
			    motion_field[i].incon_flag=1;
			} 
		    } 
		} 
	    } 
	}            
#if 1      
      make_camus_field(motion_field,
		       width_mfield, height_mfield,
		       output_field);
#endif 
      //Set ringbuffer counter 
      ringbuffer = (ringbuffer+1)%(S+1);
      
      //Make room for next picture
      freeImage(p_store_pic[ringbuffer]);
      
      return 1;
    }
}

///////////
//Routines
//////////
/* Shift(=dir -> 0..8) *pic1 */
/* Result is in *diff_pic */
void shift(Picture *p_pic1, 
	   Picture *p_shifted_pic,
	   int dir )
{
  register int i1,j1;
  BYTE *c_in,*c_out;
  int width=p_pic1->width,height=p_pic1->height;
  
  c_in =p_pic1->data;
  c_out=p_shifted_pic->data;
  
  switch(dir)
    {
    case 0:
      {//no shift at all
	memcpy(c_out,c_in,width*height*sizeof(BYTE));
      } break;
      
    case 1:
      {//shift to right
	for(i1=0;i1<height;i1++)
	  {
	    *(c_out++)=0;
	    for(j1=0;j1<((width-1)>>2);j1++)
	      {
		*((unsigned int *)c_out)=*((unsigned int *)c_in);
		c_in+=4;
		c_out+=4;
	      }
	    //rest of line
	    for(j1=0;j1<(width-1)%4;j1++)
	      {
		*((unsigned char *)c_out)=*((unsigned char *)c_in);
		c_in++;
		c_out++;
	      }
	    c_in++;
	  }
      }
      break;
      
    case 2:
      {//combination of 1 & 3
	c_in+=width;
	for(i1=0;i1<(height-1);i1++)
	  {
	    *(c_out++)=0;
	    for(j1=0;j1<(width-1)>>2;j1++)
	      {
		*((unsigned int *)c_out)=*((unsigned int *)c_in);
		c_in+=4;
		c_out+=4;
	      }
	    
	    for(j1=0;j1<(width-1)%4;j1++)
	      {
		*((unsigned char *)c_out)=*((unsigned char *)c_in);
		c_in++;
		c_out++;
	      }
	    c_in++;
	  }		
	
	for(i1=0;i1<width;i1++)
	  *(c_out++)=0;		
      }
      break;	
      
    case 3:
      {//shift up
	int size=(height-1)*width;
	
	c_in+=width;
	for(i1=0;i1<(size>>2);i1++)
	  {
	    *((unsigned int *)c_out)=*((unsigned int *)c_in);
	    c_in+=4;
	    c_out+=4;
	  }
	
	for(i1=0;i1<size%4;i1++)
	  {
	    *((unsigned char *)c_out)=*((unsigned char *)c_in);
	    c_in++;
	    c_out++;
	  }
	
	for(i1=0;i1<width;i1++)
	  *(c_out++)=0;
      }
      break;	
      
    case 4:
      {//combination of 3 & 5
	c_in+=width;
	for(i1=0;i1<height-1;i1++)
	  {
	    c_in++;
	    for(j1=0;j1<((width-1)>>2);j1++)
	      {
		*((unsigned int *)c_out)=*((unsigned int *)c_in);
		c_in+=4;
		c_out+=4;
	      }
	    
	    for(j1=0;j1<(width-1)%4;j1++)
	      {
		*((unsigned char *)c_out)=*((unsigned char *)c_in);
		c_in++;
		c_out++;
	      }
	    *c_out++=0;
	  }		
	
	for(i1=0;i1<width;i1++)
	  *(c_out++)=0;
	
      } break;
      
    case 5:
      {//shift to left
	for(i1=0;i1<height;i1++)
	  {
	    c_in++;
	    for(j1=0;j1<((width-1)>>2);j1++)
	      {
		*((unsigned int *)c_out)=*((unsigned int *)c_in);
		c_in+=4;
		c_out+=4;
	      }
	    
	    for(j1=0;j1<(width-1)%4;j1++)
	      {
		*((unsigned char *)c_out)=*((unsigned char *)c_in);
		c_in++;
		c_out++;
	      }
	    *c_out++=0;
	  }		
      }
      break;		
    case 6:
      {//comb 5&7
	unsigned int *tmp=(unsigned int *)c_out;
	
	for(i1=0;i1<(width>>2)+1;i1++)
	  {
	    *tmp=0;
	    tmp++;
	  }
	
	c_out+=width;	
	
	for(i1=0;i1<height-1;i1++)
	  {
	    c_in++;
	    for(j1=0;j1<((width-1)>>2);j1++)
	      {
		*((unsigned int *)c_out)=*((unsigned int *)c_in);
		c_in+=4;
		c_out+=4;
	      }
	    
	    for(j1=0;j1<(width-1)%4;j1++)
	      {
		*((unsigned char *)c_out)=*((unsigned char *)c_in);
		c_in++;
		c_out++;
	      }
	    *c_out++=0;
	  }	
	
      } break;
      
    case 7:
      {//shift down	
	int size=(height-1)*width;
	unsigned int *tmp=(unsigned int *)c_out;
	
	for(i1=0;i1<(width>>2)+1;i1++)
	  {
	    *tmp=0;
	    tmp++;
	  }
	
	c_out+=width;	
	
	for(i1=0;i1<size>>2;i1++)
	  {
	    *((unsigned int *)c_out)=*((unsigned int *)c_in);
	    c_in+=4;
	    c_out+=4;
	  }
	
	for(i1=0;i1<size%4;i1++)
	  {
	    *((unsigned char *)c_out)=*((unsigned char *)c_in);
	    c_in++;
	    c_out++;
	  }
	
      }
      break;
      
    case 8:
      {//comb 1&7
	unsigned int *tmp=(unsigned int *)c_out;
	
	for(i1=0;i1<(width>>2)+1;i1++)
	  {
	    *tmp=0;
	    tmp++;
	  }
	
	c_out+=width;	
	
	for(i1=0;i1<height-1;i1++)
	  {
	    *(c_out++)=0;
		for(j1=0;j1<((width-1)>>2);j1++)
		  {
		    *((unsigned int *)c_out)=*((unsigned int *)c_in);
		    c_in+=4;
		    c_out+=4;
		  }
		
		for(j1=0;j1<(width-1)%4;j1++)
		  {
		    *((unsigned char *)c_out)=*((unsigned char *)c_in);
		    c_in++;
		    c_out++;
		  }
		c_in++;
	  }		
      } break;
      
    default: 
      break;
    }
  
}

//Build absulute differences
#define ABS1(x) (((x)^((x)>>7))-((x)>>7))
void diffi(Picture *p_in1,
	   Picture *p_in2,
	   Picture *p_diff)
{
  register int o;
  BYTE *in,*out,*tmp;
  int size=p_in1->height*p_in1->width;
  
  in=p_in1->data;
  out=p_in2->data;
  tmp=p_diff->data;
  
  for(o=0;o<size;o++)
    {
      *tmp=ABS1((*in-*out));
      tmp++;
      in++;
      out++;
    }
}

///////////////////////
// Partial Row Sums
//////////////////////
int partial_row_sum(Picture *p_diff,
		    int width, int height, int sub_width,
		    int startx, int starty,
		    int patch_size,
		    int *ip_tmp2)
{
  register int i,j;
  
  //int size=width*height;
  int wing=(patch_size-1)>>1;			

  BYTE *tmp1;
  int *tmp2;
  
  tmp1=p_diff->data;
  tmp2=ip_tmp2;
  
  tmp1+=starty*(width+sub_width);

  for(i=0;i<(height-starty);i++)
    {
      //Offset	
      tmp1+=startx;	
      
      //Init tmp2
      *tmp2=0;
      
      //Start row summation
      for(j=0;j<patch_size;j++)
	*tmp2+=*(tmp1++);      
      
      tmp2++;
      tmp1-=wing;
      
      //Sum up rest of row
      for(j=0;j<(width-startx)-patch_size;j++)
	  {
	    *tmp2=*(tmp2-1)+*(tmp1+wing)-*(tmp1-wing-1);
	    tmp2++;
	    tmp1++;
	  }
      //reset pointer to next value
      tmp1+=wing+sub_width;
      
    }
  return 1;  
  
}


///////////////////////
// Partial Column Sums 
///////////////////////
int partial_col_sum(int *ip_tmp2,
		    int width, int height,
		    int patch_size,
		    int *ip_tmp3)
{
  register int i,j;
  int size=width*height;
  int helpi=patch_size*width;
  int *tmp2,*tmp3;
  
  tmp2=ip_tmp2;
  tmp3=ip_tmp3;

  for(i=0;i<width;i++)
    {
      *tmp3=0;
      
      for(j=0; j<patch_size; j++)
	{
	  *tmp3+=*tmp2;
	  tmp2+=width;
	}
      
      tmp3+=width;
      tmp2-=width;
      
      for(j=0;j<height-(patch_size-1);j++)
	{
	  *tmp3=*(tmp3-width)+*(tmp2+width)-*(tmp2-helpi+width);
	  tmp3+=width;
	  tmp2+=width;	
	} 
      
      tmp3=tmp3-(j+1)*width+1;
      tmp2=tmp2-size+1;
      
    }
  
  return 1;
} 

///////////////////////////
//Write in optic flow field
//////////////////////////
void make_camus_field(field m_field[],
		      int width_mfield, int height_mfield, 
		      vec_field *motion_field) 
{
  int i,dir,frameNr;
  int j=0,x1,y1,flag;
  int diff_w=(motion_field->width-width_mfield)>>1;   
  int diff_h=(motion_field->height-height_mfield)>>1;
  
  for(i=0;(unsigned) i<motion_field->datasize;i++)
    { 
      x1=i%motion_field->width;
      y1=i/motion_field->width;
      
      if( x1 < diff_w || y1 < diff_h || 
	  x1>=(motion_field->width-diff_w) || 
	  y1>=(motion_field->height-diff_h) )
	{
	}
      else
	{
	  
	  dir=m_field[j].win_dir;
	  frameNr=m_field[j].win_frameNr;
	  flag=m_field[j++].incon_flag;
	  
	  if(!flag)
	    write_in_camus_motion_field(motion_field,x1,y1,dir,frameNr);
	}
     }
}

//Fill optic flow field with values
void write_in_camus_motion_field(vec_field *camus_field,
				 int x1, int y1, int dir, int frameNr)
{
  int x[]={0, 1, 1, 0,-1,-1,-1, 0, 1};
  int y[]={0, 0,-1,-1,-1, 0, 1, 1, 1};
  
  if(frameNr!=0)
    {
      camus_field->u[y1 * camus_field->width + x1 ]=(float) -x[dir] / 
	(float) frameNr;
      camus_field->v[y1 * camus_field->width + x1 ]=(float) -y[dir] / 
	(float) frameNr;
    }
  else
    {//No motion
      camus_field->u[y1 * camus_field->width + x1 ]=0;
      camus_field->v[y1 * camus_field->width + x1 ]=0;
    }
  
  //Indicate a valid entry
  camus_field->valid[y1 * camus_field->width + x1 ]=1;
}
