#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "mathmi.h"



int least_square(float **A, int Acol,int Arow,
		  float *b, int brow,
		  float *x, int xrow,
		  float *residual)
{
  float **Q,**Atmp;
  float **P=NULL;
  float *btmp;
  
  int Prow=0,Pcol=0,Qrow=Arow,Qcol=Arow;
  int r,c,l;
  float sum;
    
  Atmp=matrixf(1,Arow,1,Acol);
  Q=matrixf(1,Qrow,1,Qcol);
  btmp=vectorf(1,brow);  
  //Copy A
  for(r=1;r<=Arow;r++)
    {
      for(c=1;c<=Acol;c++)
	{
	  Atmp[r][c]=A[r][c];
	}
    }
  
  //Copy b
  for(r=1;r<=brow;r++)
     btmp[r]=b[r];  
     
  //Build unit matrix
  for(r=1; r<=Qrow; r++)
    {
      for(c=1; c<=Qcol; c++)
	{
	  if(r==c)
	    Q[r][c]=1.0;
	  else 
	    Q[r][c]=0.0;
	}
    }
  
  //Successive Householder Triangulation
  for(l=1; l <= Acol ; l++)
    {
      if(P!=NULL)
	freematrixf(P,1,Prow,1,Pcol);
      
      P=calc_P(Atmp,Acol,Arow,l-1,l-1);
      
      //Error
      if(P==NULL)
	return 0;
      
      Pcol=Prow=Arow;
      
      Atmp=matrix_mult(P,Pcol,Prow, 
		    Atmp,Acol,Arow);

      Q=matrix_mult(P,Pcol,Prow,Q,Qcol,Qrow);
      Qcol=Qrow=Prow;
    }
  
  //right hand side
  btmp=matrix_vector_mult(Q, Qcol, Qrow, btmp, brow);
  
  //BackSubstitue
  for(l=0; l < xrow; l++)
    {
      sum=0.0;
      for(r=0; r < l ; r++)
	sum+=x[xrow-r] * Atmp[xrow-l][Acol-r];
      
      if( Atmp[xrow-l][Acol-l]!=0.0)
	x[xrow-l]=(btmp[xrow-l] - sum) / Atmp[xrow-l][Acol-l] ;
      else
	x[xrow-l]=1e10;
    }
  
  //Calculate residual
  sum=0.0;
  for(l=xrow+1; l <= brow; l++)
    sum+=FABS(btmp[l]);  
  
  *residual=sum;

  //show_matrix(Atmp,Acol,Arow);
  
  freevectorf(btmp,1,brow);
  freematrixf(Atmp, 1,Arow,1,Acol);
  freematrixf(P, 1,Prow,1,Pcol);
  freematrixf(Q, 1,Qrow,1,Qcol);
  
  //everything o.k.
  return 1;

}

void transpose_NxN(float **Q,int Qcol,int Qrow)
{
  int r,c;
  float tmp;

  for(r=1; r<=Qrow; r++)
    {
      for(c=r+1; c<=Qcol; c++)
	{
	  tmp=Q[c][r];
	  Q[c][r]=Q[r][c];
	  Q[r][c]=tmp;
	}
    }
}

float **matrix_mult(float **F, int fcol, int frow, 
		    float **Atmp, int tcol, int trow)
{
  float **tmp;
  int r,c;
  float sum=0.0;
  int hx;
  
  tmp=matrixf(1,frow,1,tcol);
  
  for(r=1; r<=frow; r++)
    {
      for(c=1; c<=tcol; c++)
	{
	  
	  sum=0.0;
	  for(hx=1; hx <= fcol; hx++)
	    sum+=F[r][hx]*Atmp[hx][c];
	  
	  tmp[r][c]=sum;
	}
    }
  
  freematrixf(Atmp,1,trow,1,tcol);
  
  return tmp;

}

float **calc_P(float **A, int col, int row, int coff, int roff)
{
   float *u;
   float **P;
   
   float inv_norm_ux2;
   float sum;
   
   int r,c;
   int size=row-roff;

   u=vectorf(1,size);
   
   P=matrixf(1,row,1,row);
   
   for(r=1; r <= size; r++)
     {
       if(r==1)
	 { 
	   sum=0.0;
	   for(c=1; c <= size; c++)
	     sum+=A[c+roff][1+coff]*A[c+roff][1+coff];
	   
	   u[r]=A[r+roff][1+coff] - sqrt(sum);
	 }     
       else
	 u[r]=A[r+roff][1+coff];
     }
   
   inv_norm_ux2= -2.0 / normxnorm(u,size);
   
   for(r=1; r<=row; r++)
     {
       for(c=1; c<=row; c++)
	 {
	   if(c<=coff || r<=roff )
	     { 
	       if(r==c)
		 P[r][c] = 1.0;
	       else
		 P[r][c] = 0.0;
	     }
	   else
	     {
	       if(r==c)
		 P[r][c] = 1.0 + inv_norm_ux2 * u[r-roff]*u[c-coff];
	       else
		 P[r][c] = inv_norm_ux2 * u[r-roff]*u[c-coff]; 
	     }
	 }
     }
   
   freevectorf(u,1,size);
   
   return P;
}

float *matrix_vector_mult(float **Q,int Qcol,int Qrow,float *b,int brow)
{
  int r,c;
  float sum;
  
  float *tmp;
  
  tmp=vectorf(1,brow);
  
  for(r=1; r<=brow; r++)
    {
      sum=0.0;
      for(c=1; c<=Qcol; c++)
	sum+=b[c]*Q[r][c];
      
      tmp[r]=sum;
    }
  
  freevectorf(b,1,brow);
  
  return tmp;
}

float normxnorm(float *u,int n)
{
  int i;
  float sum;
  
  sum=0.0;
  for(i=1; i<=n; i++)
    sum+=u[i]*u[i];

  return sum;
}


void show_matrix(float **B,int Bcol,int Brow)
{
  int r,c;
  
   for(r=1; r<=Brow; r++)
    {
      for(c=1; c<=Bcol; c++)
	{
	  fprintf(stdout,"%2.3f ",B[r][c]);
	}
      fprintf(stdout,";\n");
    }
}

void show_vector(float *b,int brow)
{
  int r;
  
   for(r=1; r<=brow; r++)
     fprintf(stdout,"%2.3f\n",b[r]);
}

//display error messages
void nrerror_liu(char error_text[]) { 
  /* replaces the nrerror function in the */
  /* numerical recipies library           */
  fprintf(stderr,"Numerical Recipes run-time error...\n");
  fprintf(stderr,"%s\n",error_text);
} /* end of _nrerror() */

//malloc float vector 
float *vectorf(int nl,int nh)
{
  float *v;

  v=(float *)malloc((unsigned) (nh-nl+1)*sizeof(float));
  if (!v) nrerror_liu("allocation failure in vector()");
  return v-nl;
}

//malloc float matrix
float **matrixf(int nrl,int nrh,int ncl,int nch)
{
  int i;
  float **m;
  
  m=(float **) malloc((unsigned) (nrh-nrl+1)*sizeof(float*));
  if (!m) nrerror_liu("allocation failure 1 in matrix()");
  m -= nrl;
  
  for(i=nrl;i<=nrh;i++) {
    m[i]=(float *) malloc((unsigned) (nch-ncl+1)*sizeof(float));
    if (!m[i]) nrerror_liu("allocation failure 2 in matrix()");
    m[i] -= ncl;
  }
  return m;
}

//Free vector
void freevectorf(float *v,int nl,int nh)
{
  free((char*) (v+nl));
  v=NULL;
}

//Free matrix
void freematrixf(float **m, int nrl,int nrh,int ncl,int nch)
{
  int i;
  
  for(i=nrh;i>=nrl;i--) 
    {
      free((char*) (m[i]+ncl));
      m[i]=NULL;
    }
  
  free((char*) (m+nrl));
  m=NULL;
}


//Singular Value decomposition
static double at,bt,ct;
#define PYTHAG(a,b) ((at=fabs(a)) > (bt=fabs(b)) ? \
(ct=bt/at,at*sqrt(1.0+ct*ct)) : (bt ? (ct=at/bt,bt*sqrt(1.0+ct*ct)): 0.0))

#ifndef MAX
static double maxarg1,maxarg2;
#define MAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
	(maxarg1) : (maxarg2))
#endif

#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))

void svdcmp(float **a, int m, int n, float *w, float **v)
{
  int flag,i,its,j,jj,k,l,nm;
  float c,f,h,s,x,y,z;
  float anorm=0.0,g=0.0,scale=0.0;
  float *rv1;

  if (m < n) nrerror_liu("SVDCMP: You must augment A with extra zero rows");
  rv1=vectorf(1,n);
  for (i=1;i<=n;i++) {
    l=i+1;
    rv1[i]=scale*g;
    g=s=scale=0.0;
    if (i <= m) {
      for (k=i;k<=m;k++) scale += fabs(a[k][i]);
      if (scale) {
	for (k=i;k<=m;k++) {
	  a[k][i] /= scale;
	  s += a[k][i]*a[k][i];
	}
	f=a[i][i];
	g = -SIGN(sqrt(s),f);
	h=f*g-s;
	a[i][i]=f-g;
	if (i != n) {
	  for (j=l;j<=n;j++) {
	    for (s=0.0,k=i;k<=m;k++) s += a[k][i]*a[k][j];
	    f=s/h;
	    for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
	  }
				}
	for (k=i;k<=m;k++) a[k][i] *= scale;
      }
    }
    w[i]=scale*g;
    g=s=scale=0.0;
    if (i <= m && i != n) {
      for (k=l;k<=n;k++) scale += fabs(a[i][k]);
      if (scale) {
	for (k=l;k<=n;k++) {
	  a[i][k] /= scale;
	  s += a[i][k]*a[i][k];
	}
	f=a[i][l];
	g = -SIGN(sqrt(s),f);
	h=f*g-s;
	a[i][l]=f-g;
	for (k=l;k<=n;k++) rv1[k]=a[i][k]/h;
	if (i != m) {
	  for (j=l;j<=m;j++) {
	    for (s=0.0,k=l;k<=n;k++) s += a[j][k]*a[i][k];
	    for (k=l;k<=n;k++) a[j][k] += s*rv1[k];
	  }
	}
	for (k=l;k<=n;k++) a[i][k] *= scale;
      }
		}
    anorm=MAX(anorm,(fabs(w[i])+fabs(rv1[i])));
  }
  for (i=n;i>=1;i--) {
    if (i < n) {
      if (g) {
	for (j=l;j<=n;j++)
	  v[j][i]=(a[i][j]/a[i][l])/g;
	for (j=l;j<=n;j++) {
	  for (s=0.0,k=l;k<=n;k++) s += a[i][k]*v[k][j];
	  for (k=l;k<=n;k++) v[k][j] += s*v[k][i];
	}
      }
      for (j=l;j<=n;j++) v[i][j]=v[j][i]=0.0;
    }
    v[i][i]=1.0;
    g=rv1[i];
    l=i;
  }
  for (i=n;i>=1;i--) {
    l=i+1;
    g=w[i];
    if (i < n)
      for (j=l;j<=n;j++) a[i][j]=0.0;
		if (g) {
		  g=1.0/g;
		  if (i != n) {
		    for (j=l;j<=n;j++) {
		      for (s=0.0,k=l;k<=m;k++) s += a[k][i]*a[k][j];
		      f=(s/a[i][i])*g;
		      for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
		    }
		  }
		  for (j=i;j<=m;j++) a[j][i] *= g;
		} else {
		  for (j=i;j<=m;j++) a[j][i]=0.0;
		}
		++a[i][i];
  }
  for (k=n;k>=1;k--) {
    for (its=1;its<=30;its++) {
      flag=1;
      for (l=k;l>=1;l--) {
	nm=l-1;
	if (fabs(rv1[l])+anorm == anorm) {
	  flag=0;
	  break;
	}
	if (fabs(w[nm])+anorm == anorm) break;
      }
      if (flag) {
	c=0.0;
	s=1.0;
	for (i=l;i<=k;i++) {
	  f=s*rv1[i];
	  if (fabs(f)+anorm != anorm) {
	    g=w[i];
	    h=PYTHAG(f,g);
	    w[i]=h;
	    h=1.0/h;
	    c=g*h;
	    s=(-f*h);
	    for (j=1;j<=m;j++) {
	      y=a[j][nm];
	      z=a[j][i];
	      a[j][nm]=y*c+z*s;
	      a[j][i]=z*c-y*s;
	    }
	  }
	}
      }
      z=w[k];
      if (l == k) {
	if (z < 0.0) {
	  w[k] = -z;
	  for (j=1;j<=n;j++) v[j][k]=(-v[j][k]);
	}
	break;
      }
      if (its == 30) nrerror_liu("No convergence in 30 SVDCMP iterations");
      x=w[l];
      nm=k-1;
      y=w[nm];
      g=rv1[nm];
      h=rv1[k];
      f=((y-z)*(y+z)+(g-h)*(g+h))/(2.0*h*y);
      g=PYTHAG(f,1.0);
      f=((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
      c=s=1.0;
      for (j=l;j<=nm;j++) {
	i=j+1;
	g=rv1[i];
	y=w[i];
	h=s*g;
	g=c*g;
	z=PYTHAG(f,h);
	rv1[j]=z;
	c=f/z;
	s=h/z;
	f=x*c+g*s;
	g=g*c-x*s;
	h=y*s;
	y=y*c;
	for (jj=1;jj<=n;jj++) {
	  x=v[jj][j];
	  z=v[jj][i];
	  v[jj][j]=x*c+z*s;
	  v[jj][i]=z*c-x*s;
	}
	z=PYTHAG(f,h);
	w[j]=z;
	if (z) {
	  z=1.0/z;
	  c=f*z;
	  s=h*z;
	}
	f=(c*g)+(s*y);
	x=(c*y)-(s*g);
	for (jj=1;jj<=m;jj++) {
	  y=a[jj][j];
	  z=a[jj][i];
	  a[jj][j]=y*c+z*s;
	  a[jj][i]=z*c-y*s;
	}
      }
      rv1[l]=0.0;
      rv1[k]=f;
      w[k]=x;
    }
  }
  freevectorf(rv1,1,n);
}




