/***** *Property of the University of British Columbia (UBC), *Copyright 2001, by UBC. * *By receiving this code, you are agreeing to the following terms: *1. You will use this code for academic purposes only. *2. For academic use only, you may distribute the binary or executable code * to persons at UBC or the Univ. of Western Australia who have previously * read and agreed to these terms, but you must distribute the SOURCE code * with it. *3. Each file of source code so distributed must have this header attached. *4. If the code is revised, the programmer's name and revision date must be added * to the Revision List below, as well as the revisions identified in the code. *5. You will not make this code more widely available via any method such as * publishing in print, email mail-list, usenet posting, website etc. *6. UBC reserves all rights to this work and all derivative works. * *For other proposed purposes please contact: *The University-Industry Liaison Office *IRC Room 331 - 2194 Health Sciences Mall *University of British Columbia *Vancouver, BC, Canada V6T 1Z3 *Tel: (604) 822-8580 *Fax: (604) 822-8589 * *or contact: *Peter D. Lawrence, Professor at peterl@ece.ubc.ca or *Greg Z. Grudic, Assistant Professor, at grudic@cs.colorado.edu * *Revision List: *Greg Grudic, August 28, 1998. *Robin Atkins, August 31, 2000. *Peter Lawrence (pdl), December 31, 2001. *****/ /* % % File: data_vt.c % Program: Functions for Accessing data for Functional Approximation Code % % Author: Greg Grudic % % Notes: % Includes Functions: % Initialise_Data: Reads data from DataTrain into orig_lrn_data. % Get_Lrn_Ex and others: Return a specific value from orig_lrn_data. % Split_Up_Data: Creates pointers (subset_lrn) to data in orig_lrn_data % % Changes by Robin: % Wrote Split_Up_Data (line 700), which is used to split up the huge data file % DataTrain into ten sets of learning and validation data sets, called subset_lrn % and subset_val. % % Changed the way Get_Lrn_Ex (line 570) and the others with similiar names return % the data. Now they all access the data in orig_inputs_lrn and orig_outputs_lrn % according to the pointers subset_val and subset_lrn. Original code is % commented out. % % Changed Delete_Data (line 80) to free up the memory allocated to subset_lrn and % subset_val % % Wrote Set_Current_Approx (line 75) based entirely on Greg's Set_Data_Dim_Output % to set the value of the local variable curr_approx. % % Changed the way Initialise_Data and Initialise_Data_Test_Val read from appr.ini % to make it simpler. % % Added free_Robin_Matrix (line 620) based entirely on Greg's free_My_Matrix to % free up memory allocated to a Robin_Matrix % % Added Robin_Matrix (line 670) based entirely on My_Matrix to allocate memory for % the subset_lrn and subset_val arrays. % Peter Lawrence primarily re-inserted Greg's original Get_Lrn_Ex functions to access % the test data, since during testing there are no longer 10 learning and validation % sets. */ #include "b_pc.h" #include #include #include My_Real **orig_inputs_lrn, **orig_outputs_lrn; My_Real **orig_inputs_val, **orig_outputs_val; /* ************************** Added by Robin Atkins ************************ */ int **subset_val, **subset_lrn; /* ************************************************************************* */ int dim_in, dim_out; int curr_dim_out; int curr_approx; void Set_Data_Dim_Output(int curr_d_o) { curr_dim_out = curr_d_o; } /* ************************** Added by Robin Atkins ************************ */ void Set_Current_Approx(int curr_ap) { curr_approx = curr_ap; } /* ************************************************************************* */ void Delete_Data(void) { void free_My_Matrix(My_Real **m); void free_Robin_Matrix(int **m); free_My_Matrix(orig_inputs_lrn); free_My_Matrix(orig_outputs_lrn); /* free_My_Matrix(orig_inputs_val);pdl deleted*/ /* free_My_Matrix(orig_outputs_val);pdl deleted*/ /* ************************** Added by Robin Atkins ************************ */ free_Robin_Matrix(subset_lrn); free_Robin_Matrix(subset_val); /* ************************************************************************* */ } void Delete_Data2(void) /* pdl added for DataTest*/ { void free_My_Matrix(My_Real **m); void free_Robin_Matrix(int **m); free_My_Matrix(orig_inputs_lrn); free_My_Matrix(orig_outputs_lrn); /* free_My_Matrix(orig_inputs_val); pdl deleted*/ /* free_My_Matrix(orig_outputs_val); pdl deleted*/ /* **************Added by Robin Atkins. Deleted by Peter Lawrence ********** */ /* free_Robin_Matrix(subset_lrn);*/ /* free_Robin_Matrix(subset_val);*/ /* ************************************************************************* */ } void Initialize_Data(int *num_lrn_ex, int *num_val_ex, int *dim_i, int *dim_o) /* pdl: Used only in Training phase. num_val_ex is unused argument now */ { My_Real **My_Matrix(long nrh, long nch); void skiptoend(FILE *fp); FILE *data_file, *fp; int cont, sens_read, act_read, i; float f_tmp; char error_text[100]; /** read the appr.ini file ***/ if ((fp = fopen("appr.ini", "r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "appr.ini"); My_Error(error_text); } /* ************************** Changed by Robin Atkins ************************ */ if (fscanf(fp, "%d %d", &(dim_in), &(dim_out) ) != 2) { sprintf(error_text, "Initialize_Data: cannot read dim_in or dim_out from appr.ini\n"); My_Error(error_text); } /* ************************************************************************* */ *dim_i = dim_in; *dim_o = dim_out; fclose(fp); /******************************/ /***** Get the learning Data *****************************/ /** calculate *num_lrn_ex in DataTrain **/ if ((data_file = fopen("DataTrain","r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "DataTrain"); My_Error(error_text); } *num_lrn_ex = 0; cont = 1; while ( cont == 1 ) { sens_read = 1; for ( i = 0; i < dim_in && sens_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { sens_read = 0; } } act_read = 1; for ( i = 0; i < dim_out && sens_read == 1 && act_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { act_read = 0; } } if ( (sens_read == 0) || (act_read == 0) ) { cont = 0; } else { *num_lrn_ex = *num_lrn_ex + 1; skiptoend(data_file); } } fclose(data_file); /*****************************************************/ /**** Assign Values ****/ orig_inputs_lrn = My_Matrix(*num_lrn_ex, dim_in); orig_outputs_lrn = My_Matrix(*num_lrn_ex, dim_out); if ((data_file = fopen("DataTrain","r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "DataTrain"); My_Error(error_text); } for ( cont = 0; cont < *num_lrn_ex; cont++ ) { sens_read = 1; for ( i = 0; i < dim_in && sens_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { sens_read = 0; } else { orig_inputs_lrn[cont][i] = f_tmp; } } act_read = 1; for ( i = 0; i < dim_out && sens_read == 1 && act_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { act_read = 0; } else { orig_outputs_lrn[cont][i] = f_tmp; } } if ( (sens_read == 0) || (act_read == 0) ) { My_Error("Error in reading the DataTrain file!!!\n"); } else { skiptoend(data_file); } } fclose(data_file); /********************************************/ /* pdl removed the reading of the validation file here since */ /* validation data is split off of the DataTrain file*/ } void Initialize_Data_Test_Val(int *num_lrn_ex, int *num_val_ex, int *dim_i, int *dim_o) /* This routine is used during processing of DataTest*/ /* num_val_ex is unused argument now */ { My_Real **My_Matrix(long nrh, long nch); void skiptoend(FILE *fp); FILE *data_file, *fp; int cont, sens_read, act_read, i; float f_tmp; char error_text[100]; /** read the appr.ini file ***/ if ((fp = fopen("appr.ini", "r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "appr.ini"); My_Error(error_text); } /* ************************** Changed by Robin Atkins ************************ */ if (fscanf(fp, "%d %d", &(dim_in), &(dim_out) ) != 2) { sprintf(error_text, "Initialize_Data: cannot read dim_in or dim_out from appr.ini\n"); My_Error(error_text); } /* *************************************************************************** */ *dim_i = dim_in; *dim_o = dim_out; fclose(fp); /******************************/ /***** Get the Test Data *****************************/ /** calculate *num_lrn_ex in DataTest **/ if ((data_file = fopen("DataTest","r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "DataTest"); My_Error(error_text); } *num_lrn_ex = 0; cont = 1; while ( cont == 1 ) { sens_read = 1; for ( i = 0; i < dim_in && sens_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { sens_read = 0; } } act_read = 1; for ( i = 0; i < dim_out && sens_read == 1 && act_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { act_read = 0; } } if ( (sens_read == 0) || (act_read == 0) ) { cont = 0; } else { *num_lrn_ex = *num_lrn_ex + 1; skiptoend(data_file); } } fclose(data_file); /*****************************************************/ /**** Assign Values ****/ orig_inputs_lrn = My_Matrix(*num_lrn_ex, dim_in); orig_outputs_lrn = My_Matrix(*num_lrn_ex, dim_out); if ((data_file = fopen("DataTest","r")) == NULL) { sprintf(error_text, "Couldn't open \"%s\"\n", "DataTest"); My_Error(error_text); } for ( cont = 0; cont < *num_lrn_ex; cont++ ) { sens_read = 1; for ( i = 0; i < dim_in && sens_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { sens_read = 0; } else { orig_inputs_lrn[cont][i] = f_tmp; } } act_read = 1; for ( i = 0; i < dim_out && sens_read == 1 && act_read == 1; i++ ) { if (fscanf(data_file, "%f", &(f_tmp) ) != 1) { act_read = 0; } else { orig_outputs_lrn[cont][i] = f_tmp; } } if ( (sens_read == 0) || (act_read == 0) ) { My_Error("Error in reading the DataTrain file!!!\n"); } else { skiptoend(data_file); } } fclose(data_file); /********************************************/ /***** Then the Validation Data *****************************/ /* pdl removed this section since validation data not used in testing phase*/ } /*** ALL THE COMMENTS IN Get_XXX_Ex_XXXput(XXXXXX) WERE DONE BY ROBIN ATKINS ***/ void Get_Lrn_Ex_Input(int ex_num, int var, double *in) /* Used during Training*/ { *in = orig_inputs_lrn[subset_lrn[curr_approx][ex_num]][var]; } void Get_Lrn_Ex_Input2(int ex_num, /*pdl added to input TestData only*/ int var, double *in) /*pdl: used DataTest data*/ { *in = orig_inputs_lrn[ex_num][var]; /* *in = orig_inputs_lrn[subset_lrn[curr_approx][ex_num]][var]; pdl removed*/ } void Get_Lrn_Ex_Output(int ex_num, double *out) { *out = orig_outputs_lrn[subset_lrn[curr_approx][ex_num]][curr_dim_out]; } void Get_Lrn_Ex_Output2(int ex_num, /* pdl added to input TestData only*/ double *out) /*pdl: used with DataTest data*/ { *out = orig_outputs_lrn[ex_num][curr_dim_out]; } void Get_Val_Ex_Input(int ex_num, int var, double *in) { *in = orig_inputs_lrn[subset_val[curr_approx][ex_num]][var]; } void Get_Val_Ex_Output(int ex_num, double *out) { *out = orig_outputs_lrn[subset_val[curr_approx][ex_num]][curr_dim_out]; } /* skiptoend - routine to read contents of data file up to end of line */ void skiptoend(FILE *fp) { register int c; while ((c = getc(fp)) != EOF && c != '\n'); } void free_My_Matrix(My_Real **m) /* free a My_Real My_Matrix allocated by My_Matrix() */ { long nrl, ncl; nrl = 0; ncl = 0; free((char*) (m[nrl]+ncl-1)); free((char*) (m+nrl-1)); } /* ************************** Added by Robin Atkins ************************ */ void free_Robin_Matrix(int **m) /* free a My_Real Robin_Matrix allocated by Robin_Matrix() */ { long nrl, ncl; nrl = 0; ncl = 0; free((char*) (m[nrl]+ncl-1)); free((char*) (m+nrl-1)); } /* ************************************************************************* */ My_Real **My_Matrix(long nrh, long nch) /* allocate a My_Real My_Matrix with subscript range m[0..nrh][0..nch] */ { long i, nrow=nrh+1,ncol=nch+1; My_Real **m; /* allocate pointers to rows */ m=(My_Real **) malloc((size_t)((nrow+1)*sizeof(My_Real*))); if (!m) My_Error("allocation failure 1 in My_Matrix()"); m += 1; m -= 0; /* allocate rows and set pointers to them */ m[0]=(My_Real *) malloc((size_t)((nrow*ncol+1)*sizeof(My_Real))); if (!m[0]) My_Error("allocation failure 2 in My_Matrix()"); m[0] += 1; m[0] -= 0; for(i=0+1;i<=nrh;i++) m[i]=m[i-1]+ncol; /* return pointer to array of pointers to rows */ return m; } /* ************************** Added by Robin Atkins ************************ */ int **Robin_Matrix(long nrh, long nch) { long i, nrow=nrh+1,ncol=nch+1; int **m; /* allocate pointers to rows */ m=(int **) malloc((size_t)((nrow+1)*sizeof(int*))); if (!m) My_Error("allocation failure 1 in Robin_Matrix()"); m += 1; m -= 0; /* allocate rows and set pointers to them */ m[0]=(int *) malloc((size_t)((nrow*ncol+1)*sizeof(int))); if (!m[0]) My_Error("allocation failure 2 in Robin_Matrix()"); m[0] += 1; m[0] -= 0; for(i=0+1;i<=nrh;i++) m[i]=m[i-1]+ncol; /* return pointer to array of pointers to rows */ return m; } /* ************************************************************************* */ /* ************************** Added by Robin Atkins ************************ */ void Split_Up_Data(int num_orig_ex, int *num_lrn_ex, int *num_val_ex) { /* pdl: Split up data for k-fold cross-validation (where k= MAX_APPROX): Repeat k times: randomly select the cur_approx "validation set" comprising num_val_ex examples from the total dataset. All the remaining (num_lrn_ex) examples will then belong to the cur_approx "training set". The validation set is used by the learning algorithm to determine when to stop training.*/ #define AVAILABLE -1 int i, cur_approx, data_num; int status_of_data[5000]; int **Robin_Matrix(long nrh, long nch); /* calculate the number of learning and validation samples in each approx - making sure there will be enough */ /* pdl removed "- 1" in Robin's ((num_orig_ex/MAX_APPROX) - 1)*/ *num_val_ex = (int) (num_orig_ex / MAX_APPROX); /* pdl added next line: if no cross-validation, then set the no. of samples in validation set = 10% of no.in DataTrain*/ if (MAX_APPROX == 1) *num_val_ex = (int) (0.2 * num_orig_ex); *num_lrn_ex = num_orig_ex - *num_val_ex; /* allocate the matrices to record the assignment of examples to validation sets and training sets*/ subset_lrn = Robin_Matrix(MAX_APPROX, *num_lrn_ex); subset_val = Robin_Matrix(MAX_APPROX, *num_val_ex); /* initialise status array to available */ for (i = 0; i < num_orig_ex; i++) status_of_data[i]=AVAILABLE; /* Make up MAX_APPROX Validation Sets and Learning Sets*/ for (cur_approx = 0; cur_approx < MAX_APPROX; cur_approx++) { /* Seed the random-number generator with current time so that the numbers will be different every time we run. */ srand( (unsigned)clock() ); /* randomly choose the validation set from the whole data set and mark the ones chosen with the value of the current_approx so that they won't be chosen again */ i = 0; while (i < *num_val_ex) { /* randomly choose one of the data sets from the original learning data */ /* pdl removed "1 +" in Robin's data_num = 1 + (int)(num_orig_ex ..etc) */ data_num = (int)(num_orig_ex * (float)rand()/((float)(RAND_MAX+1.0))); /* check to see if it has already been chosen. If not, mark it as chosen */ if (status_of_data[data_num] == AVAILABLE) { subset_val[cur_approx][i] = data_num; status_of_data[data_num] = cur_approx; i++; } } /* assign all the values not already assigned to the current validation set to the current learning set */ data_num = 0; for (i = 0; i < num_orig_ex; i++) { if (status_of_data[i] != cur_approx) { subset_lrn[cur_approx][data_num] = i; data_num++; } } } } /* ************************************************************************** */