/***** *Property of the University of British Columbia (UBC), *Copyright 2001, by UBC. * *By receiving this code, you are agreeing to the following terms: *1. You will use this code for academic purposes only. *2. For academic use only, you may distribute the binary or executable code * to persons at UBC or the Univ. of Western Australia who have previously * read and agreed to these terms, but you must distribute the SOURCE code * with it. *3. Each file of source code so distributed must have this header attached. *4. If the code is revised, the programmer's name and revision date must be added * to the Revision List below, as well as the revisions identified in the code. *5. You will not make this code more widely available via any method such as * publishing in print, email mail-list, usenet posting, website etc. *6. UBC reserves all rights to this work and all derivative works. * *For other proposed purposes please contact: *The University-Industry Liaison Office *IRC Room 331 - 2194 Health Sciences Mall *University of British Columbia *Vancouver, BC, Canada V6T 1Z3 *Tel: (604) 822-8580 *Fax: (604) 822-8589 * *or contact: *Peter D. Lawrence, Professor at peterl@ece.ubc.ca or *Greg Z. Grudic, Assistant Professor, at grudic@cs.colorado.edu * *Revision List: *Greg Grudic, August 28, 1998. *Robin Atkins, August 31, 2000. *Peter Lawrence (pdl), Dec. 27, 2001. *****/ /* % % File: processdata.c % Program: Builds a Poly_Cascade approximation using the SPORE algorithm. % % Author: Greg Grudic % % Description: % Loads data from DataTrain, reads expected inputs and outputs from appr.ini. % Builds k poly_cascades for each output, saving to pcX.s where X is the % index 0..9 for 10 cascades for example. Tests the approximations by loading % DataTest and comparing the actual outputs with the approximated output % and reporting the average error over k approximations for however many test %samples. % % % Defines: % ERASE_APPROX_RIGHT_AWAY % This define is a memory management issue: it tells Build_Cascade whether % it should delete the aproximations for each output variable % from memory right away as soon as it is done with them, or if it should % wait until the end and delete them all at once. Cuurently the code works % with it defined. % % BUILD_APPR % This tells the program whether or not to build the approximations. % % TEST_APPROX (This name changed by pdl) % This tells the program whether or not to test the approximations. % % Functions: % Build_Approximation: Reads data from DataTrain into orig_lrn_data. % Splits up this data into training and validation sets. Builds and saves one % at a time the ten approximations, for each output. All % output is displayed on screen and written to Results.txt. % % Test_Approximation: Reads data from DataTest into orig_lrn_data (re-use of name). % Loads the ten approximations one at a time, each with true outputs. % Passes the inputs from DataTest into Evaluate_Poly_Cascade and compares the % guess of the outputs to the outputs from DataTest. The error is added to % Robin_Error and averaged at the end. All output is displayed on screen and % written to Results.txt. % % Changes by Robin: % Added the loop to go through all k approximations instead of just one in % both Build_Approximation and Test_Approximation. This also involved other % changes to the code such as calling Split_Up_Data, how the time is calculated, % how the errors are averaged, which pcX.s file to load, and when to free memory % allocated to the poly_cascade. % % Changed the way the defines are placed so we don't waste time loading % training data if we just want to evaluate the approximations. % % Completely changed Test_Approximation to simply calculate Robin_Error % instead of all of Greg's error counters. % % % pdl changes: % % Eliminated reading of validation data in the two Initialize_Data... functions. % % Allowed the use of k = MAX_APPROX = 1 by arbitrarily setting the number of % samples in the the validation set to be 10% of the number in DataTrain % since num_val_ex=num_train_ex/MAX_APPROX for k>1. See Split_Up_Data function. % Added the capability to write out the DataTrain and DataTest datasets for k=1. % This is primarily for testing purposes. % % Added Get_Lrn_Ex_Input2 and related files to solve a bug in DataTest Phase % % */ #include "b_pc.h" #include #include FILE *fp_RES_OUTPUT; FILE *fp_TRAIN_OUTPUT; /*pdl*/ FILE *fp_VAL_OUTPUT; /*pdl*/ /* .........Main Program..........*/ int main(int argc, char **argv) { /* function prototypes */ void Build_Approximation(unsigned short xsubi_i[3]); void Test_Approximation(void); /* variables */ unsigned short xsubi[3]; int i_tmp, run; /* handle command-line arguments */ if ( argc > 1 ) { /* assign users random number seeds */ sscanf(argv[1],"%d",&(i_tmp)); xsubi[0] = i_tmp; sscanf(argv[2],"%d",&(i_tmp)); xsubi[1] = i_tmp; sscanf(argv[3],"%d",&(i_tmp)); xsubi[2] = i_tmp; } else { /* assign default random number seeds */ xsubi[0] = 1; xsubi[1] = 1; xsubi[2] = 1; } /* open output file Results.txt - all info displayed on screen is also written to this file */ if ((fp_RES_OUTPUT = fopen("Results.txt","w")) == NULL) { printf("Cannot open file Results.txt\n"); exit(-1); } if ((SPLIT_OUTPUT == TRUE) && (MAX_APPROX == 1)) { /*pdl: open output file DataTrainSplit to record Training Data used */ if ((fp_TRAIN_OUTPUT = fopen("DataTrainSplit","w")) == NULL) { printf("Cannot open file DataTrainSplit\n"); exit(-1); } /*pdl: open output file DataValSplit to record Validation Data used */ if ((fp_VAL_OUTPUT = fopen("DataValSplit","w")) == NULL) { printf("Cannot open file DataValSplit\n"); exit(-1); } } /* display random seed info */ fprintf(fp_RES_OUTPUT,"Random Seed: %d %d %d\n",xsubi[0],xsubi[1],xsubi[2]); printf("Random Seed: %d %d %d\n",xsubi[0],xsubi[1],xsubi[2]); /* Repeat the Training/Testing process to estimate confidence intervals.*/ for ( run = 0; run < 10; run++) { /* build the approximations? */ #ifdef BUILD_APPR Build_Approximation(xsubi); #endif /* test the approximations? */ #ifdef TEST_APPROX Test_Approximation(); #endif } /* close the output file */ fclose(fp_RES_OUTPUT); if ((SPLIT_OUTPUT == TRUE) && (MAX_APPROX == 1)) { /* close the output file */ fclose(fp_TRAIN_OUTPUT); /* close the output file */ fclose(fp_VAL_OUTPUT); } return(1); } /*Functions Called*/ void Build_Approximation(unsigned short xsubi_i[3]) { /* variables */ FILE *fp; int i,j; /*pdl*/ double f_tmp; /*pdl*/ char pc_filename[20]; time_t start_time, stop_time, delta_time; My_Real mse_lrn, mse_val; Build_Cas_Poly *b_pc; Cascade_Poly *pc; int num_lrn_ex, num_val_ex; int curr_cross_val, dim_in, dim_out, appr_cnt; /* function prototypes */ void Save_Poly_Cascade(Cascade_Poly *pc, FILE *fp); void Delete_Poly_Cascade(Cascade_Poly *pc); void Initialize_Data(int *num_lrn_ex, int *num_val_ex, int *dim_i, int *dim_o); void Set_Data_Dim_Output(int curr_d_o); void Set_Current_Approx(int curr_ap); void Delete_Data(void); void Get_Lrn_Ex_Input(int ex_num, int var, double *in); void Get_Lrn_Ex_Output(int ex_num, double *out); void Get_Val_Ex_Input(int ex_num, int var, double *in); void Get_Val_Ex_Output(int ex_num, double *out); void Build_Poly_Cascade(Build_Cas_Poly *b_pc, void (*Get_Lrn_Ex_Input)(int ex_num, int var, double *in), void (*Get_Lrn_Ex_Output)(int ex_num, double *out), void (*Get_Val_Ex_Input)(int ex_num, int var, double *in), void (*Get_Val_Ex_Output)(int ex_num, double *out), My_Real *mse_lrn, My_Real *mse_val ); void Split_Up_Data(int num_orig_ex, int *num_lrn_ex, int *num_val_ex); /* Start the building program */ start_time = time(0); /* load the data from DataTrain */ printf("Loading Data...\n"); Initialize_Data(&num_lrn_ex, &num_val_ex, &dim_in, &dim_out); fprintf(fp_RES_OUTPUT,"Total Num of Examples = %d\n",num_lrn_ex); printf("Total Num of Examples = %d\n",num_lrn_ex); /* split up the data in DataTrain into MAX_APPROX sets of learning and validation data */ printf("Splitting up data...\n"); Split_Up_Data(num_lrn_ex, &num_lrn_ex, &num_val_ex); fprintf(fp_RES_OUTPUT,"Input Dim = %d\n",dim_in); fprintf(fp_RES_OUTPUT,"Output Dim = %d\n",dim_out); fprintf(fp_RES_OUTPUT,"Number of Approximations = %d\n",MAX_APPROX); fprintf(fp_RES_OUTPUT,"Num of Lrn Examples in Each Approx = %d\n",num_lrn_ex); fprintf(fp_RES_OUTPUT,"Num of Val Examples in Each Approx = %d\n",num_val_ex); fprintf(fp_RES_OUTPUT,"\n\nApproximation Type: POLY_DIM = %d, POLY_TERMS = %d\n\n", POLY_DIM, POLY_TERMS); printf("Input Dim = %d\n",dim_in); printf("Output Dim = %d\n",dim_out); printf("Number of Approximations = %d\n",MAX_APPROX); printf("Num of Lrn Examples in Each Approx = %d\n",num_lrn_ex); printf("Num of Val Examples in Each Approx = %d\n",num_val_ex); printf("\n\nApproximation Type: POLY_DIM = %d, POLY_TERMS = %d\n\n", POLY_DIM, POLY_TERMS); /*pdl: If no cross-validation, write out the Training Data into DataTrainSplit. This is to examine the samples chosen, for program testing purposes only*/ if ((SPLIT_OUTPUT == TRUE) && (MAX_APPROX == 1)) { Set_Current_Approx(0); for (i = 0; ipc = &(pc[appr_cnt]); b_pc->num_lrn_ex = num_lrn_ex; b_pc->num_val_ex = num_val_ex; b_pc->xsubi[0] = xsubi_i[0]; b_pc->xsubi[1] = xsubi_i[1]; b_pc->xsubi[2] = xsubi_i[2]; /* actually build the poly cascade */ Build_Poly_Cascade(b_pc, Get_Lrn_Ex_Input, Get_Lrn_Ex_Output, Get_Val_Ex_Input, Get_Val_Ex_Output, &mse_lrn, &mse_val); /* print out final mean squared error */ fprintf(fp_RES_OUTPUT,"\n\n########################################\n"); fprintf(fp_RES_OUTPUT," After Leaning appr_cnt %d: mse_lrn = %g, mse_val = %g\n", appr_cnt, mse_lrn, mse_val); fprintf(fp_RES_OUTPUT,"########################################\n\n"); printf("\n\n########################################\n"); printf(" After Leaning appr_cnt %d: mse_lrn = %g, mse_val = %g\n", appr_cnt, mse_lrn, mse_val); printf("########################################\n\n"); /* initialise random seeds based on last random number */ xsubi_i[0] = b_pc->xsubi[0]; xsubi_i[1] = b_pc->xsubi[1]; xsubi_i[2] = b_pc->xsubi[2]; /* free memory allocated to b_pc */ free(b_pc); /* open pcX.s to save the approximation */ sprintf(pc_filename,"pc%d.s",curr_cross_val); if ((fp = fopen(pc_filename,"ab")) == NULL) { fprintf(fp_RES_OUTPUT,"Couldn't open \"%s\"\n",pc_filename); printf("Couldn't open \"%s\"\n",pc_filename); } /* save it */ Save_Poly_Cascade(&(pc[appr_cnt]),fp); #ifdef ERASE_APPROX_RIGHT_AWAY /* this ifdef is a memory management issue */ Delete_Poly_Cascade(&(pc[appr_cnt])); #endif /* close the file */ fclose(fp); } #ifdef ERASE_APPROX_RIGHT_AWAY /* this ifdef is a memory management issue */ /* free the memory allocated to pc */ free(pc); #endif #ifndef ERASE_APPROX_RIGHT_AWAY /* this ifdef is a memory management issue */ /* now delete the approximations */ for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { Delete_Poly_Cascade(&(pc[appr_cnt])); } /* free the memory allocated to pc */ free(pc); /************************************/ #endif } /* calculate the total learning time */ stop_time = time(0); delta_time = stop_time - start_time; fprintf(fp_RES_OUTPUT,"\n\nLearning time = %ld sec\n",delta_time); printf("\n\nLearning time = %ld sec\n",delta_time); fflush(stdout); /* free the memory used by the data in orig_inputs_lrn etc... */ Delete_Data(); } void Test_Approximation(void) { /* variables */ Cascade_Poly *pc; int curr_cross_val; char pc_filename[20]; FILE *fp; time_t start_time, stop_time, delta_time; My_Real *x, des, act; int i, j; /*pdl added Est[3], MaxAbsErr[3], and Loc[3] below*/ int num_lrn_ex, num_val_ex, dim_in, dim_out, appr_cnt, Loc[3]; float Robin_Error[5], True[3], Err[3], Est[3], MaxAbsErr[3]; /* function prototypes */ void Load_Poly_Cascade(Cascade_Poly *pc, FILE *fp); void Delete_Poly_Cascade(Cascade_Poly *pc); void Set_Current_Approx(int curr_ap); void Initialize_Data_Test_Val(int *num_lrn_ex, int *num_val_ex, int *dim_i, int *dim_o); void Set_Data_Dim_Output(int curr_d_o); void Delete_Data(void); void Delete_Data2(void); /*pdl added*/ void Get_Lrn_Ex_Input(int ex_num, int var, double *in); void Get_Lrn_Ex_Input2(int ex_num, /*pdl added*/ int var, double *in); void Get_Lrn_Ex_Output(int ex_num, double *out); void Get_Lrn_Ex_Output2(int ex_num, /*pdl added*/ double *out); void Get_Val_Ex_Input(int ex_num, int var, double *in); void Get_Val_Ex_Output(int ex_num, double *out); void Split_Up_Data(int num_orig_ex, int *num_lrn_ex, int *num_val_ex); My_Real Evaluate_Poly_Cascade(Cascade_Poly *pc, My_Real *x_in); /* Start the Testing Program */ start_time = time(0); printf("\n\nTesting the Saved Approximation:\n"); fprintf(fp_RES_OUTPUT,"\n\nTesting the Saved Approximation:\n"); /* load the data from DataTest and (arbitrarily) read it into the learning data array */ printf("Loading Data...\n"); Initialize_Data_Test_Val(&num_lrn_ex, &num_val_ex, &dim_in, &dim_out); fprintf(fp_RES_OUTPUT,"Total Num of Test Examples = %d\n",num_lrn_ex); printf("Total Num of Test Examples = %d\n",num_lrn_ex); fprintf(fp_RES_OUTPUT,"Input Dim = %d\n",dim_in); fprintf(fp_RES_OUTPUT,"Output Dim = %d\n",dim_out); fprintf(fp_RES_OUTPUT,"Number of Approximations = %d\n",MAX_APPROX); printf("Input Dim = %d\n",dim_in); printf("Output Dim = %d\n",dim_out); /* initialise Robin_Error and maximum absolute error to 0 */ for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { Robin_Error[appr_cnt] = 0; MaxAbsErr[appr_cnt] = 0; Loc[appr_cnt] = 0; } /*pdl: For each test example, calculate mean value of all MAX_APPROX approximations */ /* cycle through all the test examples */ for ( i = 0; i < num_lrn_ex; i++ ) { /*pdl: Initialize estimations of outputs*/ for ( j = 0; j < dim_out; j++) Est[j] = 0.0; /* cycle through the MAX_APPROX approximation cross-validation sets */ for (curr_cross_val = 0; curr_cross_val < MAX_APPROX; curr_cross_val++) { /* set some local variables in data_vt */ Set_Current_Approx(curr_cross_val); /* initialise polynomial cascade pc*/ pc = (Cascade_Poly *) malloc((unsigned)dim_out * sizeof(Cascade_Poly)); if (!(pc)) { My_Error("Cannot allocate Cascade_Poly structure!!\n"); } /* load the current polynomial cascade pc */ sprintf(pc_filename,"pc%d.s",curr_cross_val); if ((fp = fopen(pc_filename,"rb")) == NULL) { fprintf(fp_RES_OUTPUT,"Couldn't open \"%s\"\n",pc_filename); printf("Couldn't open \"%s\"\n",pc_filename); } for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { Load_Poly_Cascade(&(pc[appr_cnt]),fp); } fclose(fp); /* initialise the input data array x */ x = (My_Real *) malloc((unsigned)pc->dim * (sizeof(My_Real))); if (!(x)) { My_Error("Cannot allocate x!!\n"); } /* cycle through the output variables*/ for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { /* set the local variable in data_vt */ Set_Data_Dim_Output(appr_cnt); /* get the inputs */ for ( j = 0; j < (int)pc->dim; j++ ) { Get_Lrn_Ex_Input2(i,j,&(x[j])); /*pdl change*/ } /* get the actual output */ Get_Lrn_Ex_Output2(i,&des); /*pdl change*/ True[appr_cnt] = (float)des; /* get the best guess of the output */ act = Evaluate_Poly_Cascade(&(pc[appr_cnt]),x); /* pdl Calculate contribution to the estimation of outputs*/ Est[appr_cnt] = Est[appr_cnt] + (float)act; /* increment Robin's error counter */ /*Robin_Error[appr_cnt] = Robin_Error[appr_cnt] + (float)fabs(des-act);*/ /*Calc. error for pdl Testing below*/ Err[appr_cnt] = (float)fabs(des-act); } /*pdl Testing*/ printf("Set %d Example %d: True XYA: %6.2f %6.2f %6.2f Err XYA: %6.2f %6.2f %6.2f\n", curr_cross_val+1, i+1, True[0],True[1],True[2],Err[0],Err[1],Err[2]); fprintf(fp_RES_OUTPUT,"Set %d Example %d: True XYA: %6.2f %6.2f %6.2f Err XYA: %6.2f %6.2f %6.2f\n", curr_cross_val+1, i+1, True[0],True[1],True[2],Err[0],Err[1],Err[2]); /* free the memory used by the input array */ free(x); /* delete the approximations */ for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { Delete_Poly_Cascade(&(pc[appr_cnt])); } free(pc); /* pdl removed: print a dot every cycle to let the user know we're still going */ /*fprintf(stderr,".");*/ } /* pdl: Get average estimated value and error for each output variable*/ for ( j = 0; j < dim_out; j++) { /* Calculate the estimation average over MAX_APPROX estimates*/ Est[j] = Est[j] / MAX_APPROX; Err[j] = (float)fabs(True[j] - Est[j]); Robin_Error[j] = Robin_Error[j] + Err[j]; /* If the estimate at this testpoint is in more error than worst so far, store error*/ if ( MaxAbsErr[j] < Err[j]) { MaxAbsErr[j] = Err[j]; Loc[j] = i+1; } } printf("Example %d: Est. Error in X,Y,A: %6.2f %6.2f %6.2f\n", i+1, Err[0],Err[1],Err[2]); fprintf(fp_RES_OUTPUT,"Example %d: Est. Error in X,Y,A: %6.2f %6.2f %6.2f\n", i+1, Err[0],Err[1],Err[2]); } /*pdl: Display the Maximum Absolute Error for each output variable over all test points*/ printf("\nWorst case errors: Maximum absolute error over all test points\n"); fprintf(fp_RES_OUTPUT,"\nWorst case errors: Maximum absolute error over all test points\n"); for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { printf("Output Variable %d: %6.2f, at test point %d.\n", appr_cnt, MaxAbsErr[appr_cnt], Loc[appr_cnt]); fprintf(fp_RES_OUTPUT,"Output Variable %d: %6.2f, at test point %d.\n", appr_cnt, MaxAbsErr[appr_cnt], Loc[appr_cnt]); } /* Calculate and display average Robin_Error */ printf("\nRobin Errors: Mean absolute error over all test points\n"); fprintf(fp_RES_OUTPUT,"\nRobin Errors: Mean absolute error over all test points\n"); for ( appr_cnt = 0; appr_cnt < dim_out; appr_cnt++ ) { Robin_Error[appr_cnt] = Robin_Error[appr_cnt] / num_lrn_ex; printf("Output Variable %d: %6.2f\n", appr_cnt, Robin_Error[appr_cnt]); fprintf(fp_RES_OUTPUT,"Output Variable %d: %6.2f\n", appr_cnt, Robin_Error[appr_cnt]); } /* free the memory used by the data in orig_inputs_lrn etc... */ Delete_Data2(); /* display the total testing time */ stop_time = time(0); delta_time = stop_time - start_time; fprintf(fp_RES_OUTPUT,"\n\nValidation time = %ld sec\n",delta_time); printf("\n\nValidation time = %ld sec\n",delta_time); fflush(stdout); }