module.py
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 ini={}
00012 cm_kernel=None
00013
00014
00015 import copy
00016 import os
00017 import json
00018 import random
00019
00020
00021 def init(i):
00022 return {'cm_return':0}
00023
00024
00025 def build(i):
00026
00027 """
00028 Build model
00029
00030 Input: {
00031 model_module_uoa - model's module
00032 (model_data_uoa) - model's data (if need to record)
00033 model_name - (earth, svm)
00034 data
00035 ct_dimensions_input
00036 ct_dimensions_output
00037 desc
00038 (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00039 }
00040
00041 Output: {
00042 cm_return - return code >0 if error
00043 }
00044
00045 """
00046
00047 mmu=i.get('model_module_uoa','')
00048 if mmu=='':
00049 return {'cm_return':1, 'cm_error':'model_module_uoa is not specified in math.model.r/build'}
00050
00051 i['cm_run_module_uoa']=mmu
00052 return cm_kernel.access(i)
00053
00054
00055 def predict(i):
00056
00057 """
00058 Predict using some model
00059
00060 Input: {
00061 model_module_uoa - model's module
00062 (model_data_uoa) - model's data (if need to record)
00063 model_name - (earth, svm)
00064 data
00065 ct_dimensions_input
00066 (ct_dimensions_output) - for comparison
00067 desc - cM data description
00068 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00069 }
00070
00071 Output: {
00072 cm_return - return code >0 if error
00073 (rmse) - if comparison, root mean square error for predictions vs original
00074 (max_var) - list of points with variation more than max_variation_percent
00075 }
00076
00077 """
00078
00079 mmu=i.get('model_module_uoa','')
00080 if mmu=='':
00081 return {'cm_return':1, 'cm_error':'model_module_uoa is not specified in math.model.r/build'}
00082
00083 i['cm_run_module_uoa']=mmu
00084 return cm_kernel.access(i)
00085
00086
00087 def fit(i):
00088
00089 """
00090 Fit data using existing models (combine build and predict)
00091
00092 Input: {
00093 data
00094 ct_dimensions_input
00095 (ct_dimensions_output) - for comparison
00096 desc - cM data description
00097 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00098 }
00099
00100 Output: {
00101 cm_return - return code >0 if error
00102 results - list with info about models including
00103 model_file_size
00104 model_rmse
00105 }
00106
00107 """
00108
00109 ii_orig=copy.deepcopy(i)
00110
00111 tp=i['model_type']
00112
00113 models=ini['cfg']['models']
00114
00115 fm='tmp'
00116
00117 results=[]
00118
00119 for mm in models:
00120 if mm['type']==tp:
00121 mmx=mm['name']
00122 mmu=mm['model_module_uoa']
00123 mmn=mm['model_name']
00124
00125 cm_kernel.print_for_con('****************************************************')
00126 cm_kernel.print_for_con('Fitting model '+mmx+' ...')
00127
00128 ii=copy.deepcopy(ii_orig)
00129
00130
00131 ii['cm_run_module_uoa']=mmu
00132 ii['model_name']=mmn
00133 ii['record_data_to_file_prefix']=fm
00134 ii['cm_action']='build'
00135
00136 r=cm_kernel.access(ii)
00137 if r['cm_return']>0: return r
00138
00139 fm1=r['file_with_model']
00140
00141 if os.path.isfile(fm1):
00142 ms=os.path.getsize(fm1)
00143 mm['model_file_size']=str(ms)
00144
00145
00146 ii['cm_run_module_uoa']=mmu
00147 ii['model_name']=mmn
00148 ii['model_file']=fm1
00149 ii['cm_action']='predict'
00150
00151 r=cm_kernel.access(ii)
00152 if r['cm_return']>0: return r
00153
00154 rmse=r['rmse']
00155 mm['model_rmse']=rmse
00156
00157 results.append(mm)
00158
00159 print json.dumps(results, indent=2)
00160
00161 return {'cm_return':0, 'results':results}
00162
00163
00164 def detect_representative_points(i):
00165
00166 """
00167 Detect representative points to build the same model
00168
00169 Input: {
00170 model_type - "regression" or "classification"
00171 data
00172 ct_dimensions_input
00173 (ct_dimensions_output) - for comparison
00174 desc - cM data description
00175 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00176 }
00177
00178 Output: {
00179 cm_return - return code >0 if error
00180 results - list with info about models including
00181 model_file_size
00182 model_rmse
00183 }
00184
00185 """
00186
00187 mmu=i.get('model_module_uoa','')
00188 if mmu=='':
00189 return {'cm_return':1, 'cm_error':'model_module_uoa is not specified in math.model.r/build'}
00190
00191 mmn=i.get('model_name','')
00192 if mmn=='':
00193 return {'cm_return':1, 'cm_error':'model name is not specified'}
00194
00195 data=i['data']
00196
00197 dim1=i.get('ct_dimensions_input',[])
00198 dim2=i.get('ct_dimensions_output',[])
00199
00200 ap=i.get('additional_params',{})
00201
00202 rmse_t=float(i.get('rmse_threshold','0.2'))
00203
00204 npoints=len(data[dim1[0]])
00205
00206 fm='tmp'
00207
00208 ii_orig=copy.deepcopy(i)
00209
00210 results=[]
00211
00212 rmse0=0
00213 sing0={}
00214
00215
00216 for q in range(-1, npoints*2):
00217 ii=copy.deepcopy(ii_orig)
00218 data=ii['data']
00219
00220 if q==-1:
00221 cm_kernel.print_for_con('****************************************************')
00222 cm_kernel.print_for_con('Building original model ...')
00223 else:
00224 cm_kernel.print_for_con('****************************************************')
00225
00226
00227 q1=random.randint(0,len(data[dim1[0]])-1)
00228
00229 cm_kernel.print_for_con('Step '+str(q+1)+' of '+str(npoints*2)+' ...')
00230
00231 for d in dim1:
00232 del(data[d][q1])
00233 for d in dim2:
00234 del(data[d][q1])
00235
00236 ii['data']=data
00237
00238
00239 ii['cm_run_module_uoa']=mmu
00240 ii['model_name']=mmn
00241 ii['record_data_to_file_prefix']=fm
00242 ii['cm_action']='build'
00243 ii['additional_params']=ap
00244
00245 r=cm_kernel.access(ii)
00246 if r['cm_return']==0:
00247 sing=r.get('model',{}).get('singularities',{})
00248
00249 fm1=r['file_with_model']
00250 if os.path.isfile(fm1):
00251 ms=os.path.getsize(fm1)
00252
00253
00254 ii['cm_run_module_uoa']=mmu
00255 ii['model_name']=mmn
00256 ii['model_file']=fm1
00257 ii['cm_action']='predict'
00258
00259 r=cm_kernel.access(ii)
00260 if r['cm_return']>0: return r
00261
00262 rmse=r['rmse']
00263 rmse1=float(rmse)
00264
00265 if q==-1:
00266 rmse0=float(rmse)
00267 sing0=copy.deepcopy(sing)
00268 else:
00269 delta=(rmse1-rmse0)/rmse0
00270 print rmse0, rmse1, delta, len(sing), len(sing0)
00271 results.append(delta)
00272
00273 if abs(delta)<rmse_t and len(sing)==len(sing0):
00274 data=ii_orig['data']
00275
00276 ii_orig['data']=data
00277
00278 for d in dim1:
00279 del(data[d][q1])
00280 for d in dim2:
00281 del(data[d][q1])
00282
00283 print ''
00284 print 'POINT REMOVED!'
00285
00286 print json.dumps(results, indent=2)
00287
00288 return {'cm_return':0, 'results':results}