/*****
*Property of the University of British Columbia (UBC),
*Copyright 2001, by UBC.
*
*By receiving this code, you are agreeing to the following terms:
*1. You will use this code for academic purposes only.
*2. For academic use only, you may distribute the binary or executable code
*   to persons at UBC or the Univ. of Western Australia who have previously 
*   read and agreed to these terms, but you must distribute the SOURCE code
*   with it. 
*3. Each file of source code so distributed must have this header attached.
*4. If the code is revised, the programmer's name and revision date must be added
*   to the Revision List below, as well as the revisions identified in the code.
*5. You will not make this code more widely available via any method such as 
*   publishing in print, email mail-list, usenet posting, website etc.
*6. UBC reserves all rights to this work and all derivative works.
*
*For other proposed purposes please contact:
*The University-Industry Liaison Office 
*IRC Room 331 - 2194 Health Sciences Mall 
*University of British Columbia 
*Vancouver, BC, Canada V6T 1Z3 
*Tel: (604) 822-8580 
*Fax: (604) 822-8589 
*
*or contact: 
*Peter D. Lawrence, Professor at peterl@ece.ubc.ca or 
*Greg Z. Grudic, Assistant Professor, at grudic@cs.colorado.edu
*
*Revision List: 
*Greg Grudic, August 28, 1998.
*Robin Atkins, August 31, 2000.
*Peter Lawrence (pdl), December 31, 2001.
*****/

/*
%
% File:		data_vt.c
% Program: 	Functions for Accessing data for Functional Approximation Code
%
% Author:	Greg Grudic 
%
% Notes:
% Includes Functions:
% Initialise_Data: Reads data from DataTrain into orig_lrn_data.
% Get_Lrn_Ex and others: Return a specific value from orig_lrn_data.
% Split_Up_Data: Creates pointers (subset_lrn) to data in orig_lrn_data
% 
% Changes by Robin:
% Wrote Split_Up_Data (line 700), which is used to split up the huge data file 
% DataTrain into ten sets of learning and validation data sets, called subset_lrn 
% and subset_val.
%
% Changed the way Get_Lrn_Ex (line 570) and the others with similiar names return 
% the data. Now they all access the data in orig_inputs_lrn and orig_outputs_lrn
% according to the pointers subset_val and subset_lrn. Original code is
% commented out.
%
% Changed Delete_Data (line 80) to free up the memory allocated to subset_lrn and 
% subset_val
%
% Wrote Set_Current_Approx (line 75) based entirely on Greg's Set_Data_Dim_Output 
% to set the value of the local variable curr_approx.
%
% Changed the way Initialise_Data and Initialise_Data_Test_Val read from appr.ini 
% to make it simpler.
%
% Added free_Robin_Matrix (line 620) based entirely on Greg's free_My_Matrix to 
% free up memory allocated to a Robin_Matrix
%
% Added Robin_Matrix (line 670) based entirely on My_Matrix to allocate memory for 
% the subset_lrn and subset_val arrays.

% Peter Lawrence primarily re-inserted Greg's original Get_Lrn_Ex functions to access 
% the test data, since during testing there are no longer 10 learning and validation
% sets. 



*/

#include "b_pc.h"
#include <time.h>
#include <stdio.h>
#include <stdlib.h>

My_Real **orig_inputs_lrn, **orig_outputs_lrn;
My_Real **orig_inputs_val, **orig_outputs_val;
/* ************************** Added by Robin Atkins ************************ */
int	**subset_val, **subset_lrn;
/* ************************************************************************* */

int dim_in, dim_out;

int curr_dim_out;
int curr_approx;

void Set_Data_Dim_Output(int curr_d_o)
{
  curr_dim_out = curr_d_o;
}

/* ************************** Added by Robin Atkins ************************ */
void Set_Current_Approx(int curr_ap)
{
  curr_approx = curr_ap;
}
/* ************************************************************************* */

void Delete_Data(void)
{
  void free_My_Matrix(My_Real **m);
  void free_Robin_Matrix(int **m);

  free_My_Matrix(orig_inputs_lrn);
  free_My_Matrix(orig_outputs_lrn);
/* free_My_Matrix(orig_inputs_val);pdl deleted*/
/* free_My_Matrix(orig_outputs_val);pdl deleted*/
/* ************************** Added by Robin Atkins ************************ */
  free_Robin_Matrix(subset_lrn);
  free_Robin_Matrix(subset_val);
/* ************************************************************************* */

}

void Delete_Data2(void) /* pdl added for DataTest*/
{
  void free_My_Matrix(My_Real **m);
  void free_Robin_Matrix(int **m);

  free_My_Matrix(orig_inputs_lrn);
  free_My_Matrix(orig_outputs_lrn);
/*  free_My_Matrix(orig_inputs_val);  pdl deleted*/
/*  free_My_Matrix(orig_outputs_val); pdl deleted*/
/* **************Added by Robin Atkins. Deleted by Peter Lawrence ********** */
/*  free_Robin_Matrix(subset_lrn);*/
/*  free_Robin_Matrix(subset_val);*/
/* ************************************************************************* */

}


void Initialize_Data(int *num_lrn_ex, int *num_val_ex, int *dim_i, int *dim_o)

/* pdl: Used only in Training phase. num_val_ex is unused argument now */

{
  My_Real **My_Matrix(long nrh, long nch);

  void skiptoend(FILE *fp);

  FILE *data_file, *fp;
  int cont, sens_read, act_read, i;


  float f_tmp;


  char error_text[100];

  /** read the appr.ini file ***/
  if ((fp = fopen("appr.ini", "r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "appr.ini");
      My_Error(error_text);
    }

/* ************************** Changed by Robin Atkins ************************ */
  if (fscanf(fp, "%d %d", &(dim_in), &(dim_out) ) != 2)
    {
      sprintf(error_text,
	      "Initialize_Data: cannot read dim_in or dim_out from appr.ini\n");
      My_Error(error_text);
    }
/* ************************************************************************* */

  *dim_i = dim_in;
  *dim_o = dim_out;
 
  fclose(fp);
  /******************************/


  /***** Get the learning Data *****************************/

  /** calculate *num_lrn_ex in DataTrain **/
  if ((data_file = fopen("DataTrain","r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "DataTrain");
      My_Error(error_text);
    }

  *num_lrn_ex = 0;
  cont = 1;
  while ( cont == 1 )
    {
      sens_read = 1;
      for ( i = 0; i < dim_in && sens_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      sens_read = 0;
	    }
	}
      
      act_read = 1;
      for ( i = 0; i < dim_out && sens_read == 1 &&
	   act_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      act_read = 0;
	    }
	}

      if ( (sens_read == 0)  ||  (act_read == 0) )
	{
	  cont = 0;
	}
      else
	{
	  *num_lrn_ex = *num_lrn_ex + 1;
	  skiptoend(data_file);
	}
    }
  
  fclose(data_file);
  /*****************************************************/

  /**** Assign Values ****/
  orig_inputs_lrn = My_Matrix(*num_lrn_ex, dim_in);
  orig_outputs_lrn = My_Matrix(*num_lrn_ex, dim_out);

  if ((data_file = fopen("DataTrain","r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "DataTrain");
      My_Error(error_text);
    }

  for ( cont = 0; cont < *num_lrn_ex; cont++ )
    {
      sens_read = 1;
      for ( i = 0; i < dim_in && sens_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      sens_read = 0;
	    }
	  else
	    {
	      orig_inputs_lrn[cont][i] = f_tmp;
	    }
	}
      
      act_read = 1;
      for ( i = 0; i < dim_out && sens_read == 1 &&
	   act_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      act_read = 0;
	    }
	  else
	    {
	      orig_outputs_lrn[cont][i] = f_tmp;
	    }
	}

      if ( (sens_read == 0)  ||  (act_read == 0) )
	{
	  My_Error("Error in reading the DataTrain file!!!\n");
	}
      else
	{
	  skiptoend(data_file);
	}
    }

  fclose(data_file);
  /********************************************/

  /* pdl removed the reading of the validation file here since */
  /* validation data is split off of the DataTrain file*/

}



void Initialize_Data_Test_Val(int *num_lrn_ex, int *num_val_ex, 
			      int *dim_i, int *dim_o)

/* This routine is used during processing of DataTest*/
/* num_val_ex is unused argument now */
{
  My_Real **My_Matrix(long nrh, long nch);

  void skiptoend(FILE *fp);

  FILE *data_file, *fp;
  int cont, sens_read, act_read, i;

  float f_tmp;

  char error_text[100];

  /** read the appr.ini file ***/
  if ((fp = fopen("appr.ini", "r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "appr.ini");
      My_Error(error_text);
    }

/* ************************** Changed by Robin Atkins ************************ */
  if (fscanf(fp, "%d %d", &(dim_in), &(dim_out) ) != 2)
    {
      sprintf(error_text,
	      "Initialize_Data: cannot read dim_in or dim_out from appr.ini\n");
      My_Error(error_text);
    }
/* *************************************************************************** */

  *dim_i = dim_in;
  *dim_o = dim_out;
 
  fclose(fp);
  /******************************/


  /***** Get the Test Data *****************************/

  /** calculate *num_lrn_ex in DataTest **/
  if ((data_file = fopen("DataTest","r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "DataTest");
      My_Error(error_text);
    }

  *num_lrn_ex = 0;
  cont = 1;
  while ( cont == 1 )
    {
      sens_read = 1;
      for ( i = 0; i < dim_in && sens_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      sens_read = 0;
	    }
	}
      
      act_read = 1;
      for ( i = 0; i < dim_out && sens_read == 1 &&
	   act_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      act_read = 0;
	    }
	}

      if ( (sens_read == 0)  ||  (act_read == 0) )
	{
	  cont = 0;
	}
      else
	{
	  *num_lrn_ex = *num_lrn_ex + 1;
	  skiptoend(data_file);
	}
    }
  
  fclose(data_file);
  /*****************************************************/

  /**** Assign Values ****/
  orig_inputs_lrn = My_Matrix(*num_lrn_ex, dim_in);
  orig_outputs_lrn = My_Matrix(*num_lrn_ex, dim_out);

  if ((data_file = fopen("DataTest","r")) == NULL)
    {
      sprintf(error_text, "Couldn't open \"%s\"\n", "DataTest");
      My_Error(error_text);
    }

  for ( cont = 0; cont < *num_lrn_ex; cont++ )
    {
      sens_read = 1;
      for ( i = 0; i < dim_in && sens_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      sens_read = 0;
	    }
	  else
	    {
	      orig_inputs_lrn[cont][i] = f_tmp;
	    }
	}
      
      act_read = 1;
      for ( i = 0; i < dim_out && sens_read == 1 &&
	   act_read == 1; i++ )
	{
	  if (fscanf(data_file, "%f", &(f_tmp) ) != 1)
	    {
	      act_read = 0;
	    }
	  else
	    {
	      orig_outputs_lrn[cont][i] = f_tmp;
	    }
	}

      if ( (sens_read == 0)  ||  (act_read == 0) )
	{
	  My_Error("Error in reading the DataTrain file!!!\n");
	}
      else
	{
	  skiptoend(data_file);
	}
    }

  fclose(data_file);
  /********************************************/

  /***** Then the Validation Data *****************************/
  /* pdl removed this section since validation data not used in testing phase*/

}

/***  ALL THE COMMENTS IN Get_XXX_Ex_XXXput(XXXXXX) WERE DONE BY ROBIN ATKINS ***/

void Get_Lrn_Ex_Input(int ex_num, 
		      int var, 
		      double *in)

/* Used during Training*/
{
	*in = orig_inputs_lrn[subset_lrn[curr_approx][ex_num]][var];
}


void Get_Lrn_Ex_Input2(int ex_num,			/*pdl added to input TestData only*/
		      int var, 
		      double *in)
/*pdl: used DataTest data*/

{   
    *in = orig_inputs_lrn[ex_num][var];

/*	*in = orig_inputs_lrn[subset_lrn[curr_approx][ex_num]][var]; pdl removed*/

}

void Get_Lrn_Ex_Output(int ex_num, 
		       double *out)
{
	*out = orig_outputs_lrn[subset_lrn[curr_approx][ex_num]][curr_dim_out];
}

void Get_Lrn_Ex_Output2(int ex_num, /* pdl added to input TestData only*/
		       double *out)
/*pdl: used with DataTest data*/
{
	*out = orig_outputs_lrn[ex_num][curr_dim_out];

}


void Get_Val_Ex_Input(int ex_num, 
		      int var, 
		      double *in)
{

	*in = orig_inputs_lrn[subset_val[curr_approx][ex_num]][var];

}


void Get_Val_Ex_Output(int ex_num, 
		       double *out)
{

	*out = orig_outputs_lrn[subset_val[curr_approx][ex_num]][curr_dim_out];
}


 
/* skiptoend - routine to read contents of data file up to end of line */
void skiptoend(FILE *fp)
{
  register int    c;
  while ((c = getc(fp)) != EOF && c != '\n');
}




void free_My_Matrix(My_Real **m)
/* free a My_Real My_Matrix allocated by My_Matrix() */
{
  long nrl, ncl;
  nrl = 0;
  ncl = 0;
 
  free((char*) (m[nrl]+ncl-1));
  free((char*) (m+nrl-1));
}

/* ************************** Added by Robin Atkins ************************ */
void free_Robin_Matrix(int **m)
/* free a My_Real Robin_Matrix allocated by Robin_Matrix() */
{
  long nrl, ncl;
  nrl = 0;
  ncl = 0;
 
  free((char*) (m[nrl]+ncl-1));
  free((char*) (m+nrl-1));
}
/* ************************************************************************* */


My_Real **My_Matrix(long nrh, long nch)
     /* allocate a My_Real My_Matrix with subscript range m[0..nrh][0..nch] */
{
  long i, nrow=nrh+1,ncol=nch+1;
  My_Real **m;

  /* allocate pointers to rows */
  m=(My_Real **) malloc((size_t)((nrow+1)*sizeof(My_Real*)));
  if (!m)
    My_Error("allocation failure 1 in My_Matrix()");
		
  m += 1;
  m -= 0;

  /* allocate rows and set pointers to them */
  m[0]=(My_Real *) malloc((size_t)((nrow*ncol+1)*sizeof(My_Real)));
  if (!m[0])
    My_Error("allocation failure 2 in My_Matrix()");
		
  m[0] += 1;
  m[0] -= 0;

  for(i=0+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

  /* return pointer to array of pointers to rows */
  return m;
}


/* ************************** Added by Robin Atkins ************************ */
int **Robin_Matrix(long nrh, long nch)
{
  long i, nrow=nrh+1,ncol=nch+1;
  int **m;

  /* allocate pointers to rows */
  m=(int **) malloc((size_t)((nrow+1)*sizeof(int*)));
  if (!m)
    My_Error("allocation failure 1 in Robin_Matrix()");
		
  m += 1;
  m -= 0;

  /* allocate rows and set pointers to them */
  m[0]=(int *) malloc((size_t)((nrow*ncol+1)*sizeof(int)));
  if (!m[0])
    My_Error("allocation failure 2 in Robin_Matrix()");
		
  m[0] += 1;
  m[0] -= 0;

  for(i=0+1;i<=nrh;i++) m[i]=m[i-1]+ncol;

  /* return pointer to array of pointers to rows */
  return m;
}
/* ************************************************************************* */


/* ************************** Added by Robin Atkins ************************ */
void Split_Up_Data(int num_orig_ex, int *num_lrn_ex, int *num_val_ex)
{
	
/*  pdl: Split up data for k-fold cross-validation (where k= MAX_APPROX):
	Repeat k times: randomly select the cur_approx "validation set" comprising 
	num_val_ex examples from the total dataset. All the remaining (num_lrn_ex) 
	examples will then belong to the cur_approx "training set". The validation 
	set is used by the learning algorithm to determine when to stop training.*/
	
	#define AVAILABLE -1
	int i, cur_approx, data_num;
	int status_of_data[5000];
	int **Robin_Matrix(long nrh, long nch);

  
	/* calculate the number of learning and validation samples in each
	   approx - making sure there will be enough */
	/* pdl removed "- 1" in  Robin's ((num_orig_ex/MAX_APPROX) - 1)*/

	*num_val_ex = (int) (num_orig_ex / MAX_APPROX);

	/* pdl added next line: if no cross-validation, then 
	set the no. of samples in validation set = 10% of no.in DataTrain*/

	if (MAX_APPROX == 1) *num_val_ex = (int) (0.2 * num_orig_ex);

	*num_lrn_ex = num_orig_ex - *num_val_ex;
	
	/* allocate the matrices to record the assignment of examples to validation sets
	and training sets*/
	subset_lrn = Robin_Matrix(MAX_APPROX, *num_lrn_ex);
	subset_val = Robin_Matrix(MAX_APPROX, *num_val_ex);

	/* initialise status array to available */
	for (i = 0; i < num_orig_ex; i++) status_of_data[i]=AVAILABLE;

	/* Make up MAX_APPROX Validation Sets and Learning Sets*/
	for (cur_approx = 0; cur_approx < MAX_APPROX; cur_approx++) {
		/* Seed the random-number generator with current time so that
		   the numbers will be different every time we run. */
		srand( (unsigned)clock() );
		
		/* randomly choose the validation set from the whole data set and
		   mark the ones chosen with the value of the current_approx so that
		   they won't be chosen again */

		i = 0;
		while (i < *num_val_ex) {	
			/* randomly choose one of the data sets from the original learning data */
			/* pdl removed "1 +" in Robin's data_num = 1 + (int)(num_orig_ex ..etc) */

			data_num = (int)(num_orig_ex * (float)rand()/((float)(RAND_MAX+1.0)));

			/* check to see if it has already been chosen. If not, mark it as chosen */
			if (status_of_data[data_num] == AVAILABLE) {
				subset_val[cur_approx][i] = data_num;
				status_of_data[data_num] = cur_approx;
				i++;
			}
		}
		
		/* assign all the values not already assigned to the current
		   validation set to the current learning set */
		data_num = 0;
		for (i = 0; i < num_orig_ex; i++) {
			if (status_of_data[i] != cur_approx) {
				subset_lrn[cur_approx][data_num] = i;
				data_num++;
			}
		}
	}
}
/* ************************************************************************** */
