00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 ini={}
00012 cm_kernel=None
00013
00014
00015 import os
00016 import csv
00017 import math
00018
00019
00020 def init(i):
00021 return {'cm_return':0}
00022
00023
00024 def build(i):
00025
00026 """
00027 Build model
00028
00029 Input: {
00030 data
00031 ct_dimensions_input
00032 ct_dimensions_output
00033 desc
00034 model_name - (earth, svm)
00035 (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00036 }
00037
00038 Output: {
00039 cm_return - return code >0 if error
00040 file_with_model - file with model
00041 }
00042
00043 """
00044
00045 data=i.get('data',{})
00046 desc=i.get('desc',{})
00047
00048 model_name=i.get('model_name','')
00049 if model_name=='':
00050 return {'cm_return':1, 'cm_error':'model name is not specified'}
00051
00052 dim1=i.get('ct_dimensions_input',[])
00053 dim2=i.get('ct_dimensions_output',[])
00054
00055 dim=[]
00056 for d in dim1: dim.append(d)
00057 for d in dim2: dim.append(d)
00058
00059
00060 r=cm_kernel.gen_cm_tmp_file({})
00061 fitmp=r['cm_path1']+'.csv'
00062
00063
00064 fotmp=i.get('record_data_to_file_prefix','')
00065 if fotmp=='':
00066 r=cm_kernel.gen_cm_tmp_file({})
00067 fotmp=r['cm_path1']
00068
00069 fotmp1=fotmp+'.r.model.obj'
00070
00071 cm_kernel.print_for_con('')
00072 cm_kernel.print_for_con('Generating tmp CSV file for learning (input='+fitmp+', output='+fotmp1+') ...')
00073
00074
00075 ii={'cm_run_module_uoa':ini['cfg']['cm_modules']['ctuning.space'],
00076 'cm_action':'convert_ct_multi_array_to_csv',
00077 'cm_multi_array_s': data,
00078 'ct_dimensions': dim,
00079 'ct_dim_desc': desc,
00080 'ct_output_file': fitmp,
00081 'csv_no_header':'yes',
00082 'csv_separator':';',
00083 'csv_decimal_mark':'.'
00084 }
00085 r=cm_kernel.access(ii)
00086 if r['cm_return']>0: return r
00087
00088
00089 model_code=os.path.join(ini['path'], ini['cfg']['model_code_build']).replace(cm_kernel.convert_str_to_special('model_name'), model_name)
00090
00091 cmd='r --vanilla --args '+fitmp+' '+fotmp+' < '+model_code
00092 os.system(cmd)
00093
00094 if os.path.isfile(fitmp): os.remove(fitmp)
00095
00096 return {'cm_return':0, 'file_with_model':fotmp1}
00097
00098
00099 def predict(i):
00100
00101 """
00102 Predict using model
00103
00104 Input: {
00105 model_file
00106 data
00107 ct_dimensions_input
00108 (ct_dimensions_output) - for comparison
00109 desc - cM data description
00110 model_name - (earth, svm)
00111 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00112 }
00113
00114 Output: {
00115 cm_return - return code >0 if error
00116 (rmse) - if comparison, root mean square error for predictions vs original
00117 (max_var) - list of points with variation more than max_variation_percent
00118 }
00119
00120 """
00121
00122 mf=i.get('model_file','')
00123 if mf=='':
00124 return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00125
00126 model_name=i.get('model_name','')
00127 if model_name=='':
00128 return {'cm_return':1, 'cm_error':'model name is not specified'}
00129
00130 data=i.get('data',{})
00131 desc=i.get('desc',{})
00132
00133 dim1=i.get('ct_dimensions_input',[])
00134 dim2=i.get('ct_dimensions_output',[])
00135
00136 dim=[]
00137 for d in dim1: dim.append(d)
00138
00139
00140
00141 r=cm_kernel.gen_cm_tmp_file({})
00142 fitmp=r['cm_path1']+'.csv'
00143
00144
00145 r=cm_kernel.gen_cm_tmp_file({})
00146 fotmp=r['cm_path1']+'.csv'
00147
00148 cm_kernel.print_for_con('')
00149 cm_kernel.print_for_con('Generating tmp CSV file for learning (input='+fitmp+', output='+fotmp+') ...')
00150
00151
00152 ii={'cm_run_module_uoa':ini['cfg']['cm_modules']['ctuning.space'],
00153 'cm_action':'convert_ct_multi_array_to_csv',
00154 'cm_multi_array_s': data,
00155 'ct_dimensions': dim,
00156 'ct_dim_desc': desc,
00157 'ct_output_file': fitmp,
00158 'csv_no_header':'yes',
00159 'csv_separator':';',
00160 'csv_decimal_mark':'.'
00161 }
00162 r=cm_kernel.access(ii)
00163 if r['cm_return']>0: return r
00164
00165
00166 model_code=os.path.join(ini['path'], ini['cfg']['model_code_predict']).replace(cm_kernel.convert_str_to_special('model_name'), model_name)
00167
00168 cmd='r --vanilla --args '+mf+' '+fitmp+' '+fotmp+' < '+model_code
00169 os.system(cmd)
00170
00171
00172 if not os.path.isfile(fotmp):
00173 return {'cm_return':1, 'cm_error':'file with predictions was not created'}
00174
00175
00176 y=[]
00177
00178 f=open(fotmp, 'rb')
00179 c=csv.DictReader(f, delimiter=',')
00180 for a in c:
00181 k=a.keys()
00182 if len(k)>0:
00183 y.append(a[k[1]])
00184 f.close()
00185
00186
00187 ky=i['ct_dimensions_output'][0]
00188 d0=data[ky]
00189
00190 rr={'cm_return':0}
00191
00192 if len(d0)>0:
00193
00194 if len(d0)!=len(y):
00195 return {'cm_return':1, 'cm_error':'length of array with original data and predictions is not matching ('+str(len(data[ky]))+' vs '+str(len(y))+')'}
00196
00197 tp=desc.get(ky,{}).get('type','')
00198
00199 mvp=i.get('max_variation_percent','0.2')
00200 dmvp=float(mvp)
00201
00202 var=[]
00203
00204 s=0.0
00205 l=range(0, len(data[ky]))
00206 for q in l:
00207 v0=d0[q]
00208 v1=y[q]
00209
00210 if tp=='float' or tp=='integer':
00211 if tp=='float':
00212 dv0=float(v0)
00213 dv1=float(v1)
00214 else:
00215 dv0=int(v0)
00216 dv1=int(v1)
00217 s+=(dv0-dv1)*(dv0-dv1)
00218 diff=abs(dv1-dv0)/dv0
00219 x1=''
00220 if diff>dmvp:
00221 x1=' ***'
00222 var.append(q)
00223 print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,x1
00224 else:
00225 if v0!=v1:
00226 s+=1
00227
00228 d0[q]=y[q]
00229
00230 rmse=math.sqrt(s/len(l))
00231
00232 rr['rmse']=str(rmse)
00233 rr['max_var']=var
00234
00235 cm_kernel.print_for_con('')
00236 cm_kernel.print_for_con('Model RMSE='+str(rmse))
00237
00238 else:
00239 data[ky]=y
00240
00241
00242 fx=i.get('ct_output_file_s_json','')
00243 if fx!='':
00244 cm_kernel.print_for_con('')
00245 cm_kernel.print_for_con('Recording to json file '+fx+' ...')
00246
00247 r=cm_kernel.save_array_to_file_as_json({'cm_filename':fx, 'cm_array':{'data':data, 'desc':desc}})
00248 if r['cm_return']>0: return r
00249
00250 return rr