Welcome    Usage    Browse    Find CID    Search     Log in

cM API Documentation

module.py

Go to the documentation of this file.
00001 #
00002 # Collective Mind
00003 #
00004 # See cM LICENSE.txt for licensing details.
00005 # See cM Copyright.txt for copyright details.
00006 #
00007 # Developer(s): (C) Grigori Fursin, started on 2011.09
00008 #
00009 
00010 # Should always be here
00011 ini={}
00012 cm_kernel=None
00013 
00014 # Local settings
00015 import os
00016 import csv
00017 import math
00018 
00019 # ============================================================================
00020 def init(i):
00021     return {'cm_return':0}
00022 
00023 # ============================================================================
00024 def build(i):
00025 
00026     """
00027     Build model
00028 
00029     Input:  {
00030               data                 
00031               ct_dimensions_input
00032               ct_dimensions_output
00033               desc
00034               model_name                   - (earth, svm)
00035               (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00036             }
00037 
00038     Output: {
00039               cm_return       - return code >0 if error
00040               file_with_model - file with model
00041             }
00042     
00043     """
00044 
00045     data=i.get('data',{})
00046     desc=i.get('desc',{})
00047 
00048     model_name=i.get('model_name','')
00049     if model_name=='': 
00050        return {'cm_return':1, 'cm_error':'model name is not specified'}
00051     
00052     dim1=i.get('ct_dimensions_input',[])
00053     dim2=i.get('ct_dimensions_output',[])
00054     
00055     dim=[]
00056     for d in dim1: dim.append(d)
00057     for d in dim2: dim.append(d)
00058 
00059     # Generate input tmp csv file name
00060     r=cm_kernel.gen_cm_tmp_file({})
00061     fitmp=r['cm_path1']+'.csv'
00062 
00063     # Generate output tmp file name
00064     fotmp=i.get('record_data_to_file_prefix','')
00065     if fotmp=='':
00066        r=cm_kernel.gen_cm_tmp_file({})
00067        fotmp=r['cm_path1']
00068 
00069     fotmp1=fotmp+'.r.model.obj'
00070 
00071     cm_kernel.print_for_con('')
00072     cm_kernel.print_for_con('Generating tmp CSV file for learning (input='+fitmp+', output='+fotmp1+') ...')
00073 
00074     # Convert data to csv 
00075     ii={'cm_run_module_uoa':ini['cfg']['cm_modules']['ctuning.space'],
00076         'cm_action':'convert_ct_multi_array_to_csv',
00077         'cm_multi_array_s': data,
00078         'ct_dimensions': dim,
00079         'ct_dim_desc': desc,
00080         'ct_output_file': fitmp,
00081         'csv_no_header':'yes',
00082         'csv_separator':';',
00083         'csv_decimal_mark':'.'
00084        }
00085     r=cm_kernel.access(ii)
00086     if r['cm_return']>0: return r
00087 
00088     # Prepare calling model
00089     model_code=os.path.join(ini['path'], ini['cfg']['model_code_build']).replace(cm_kernel.convert_str_to_special('model_name'), model_name)
00090     
00091     cmd='r --vanilla --args '+fitmp+' '+fotmp+' < '+model_code
00092     os.system(cmd)
00093 
00094     if os.path.isfile(fitmp): os.remove(fitmp)
00095 
00096     return {'cm_return':0, 'file_with_model':fotmp1}
00097 
00098 # ============================================================================
00099 def predict(i):
00100 
00101     """
00102     Predict using model
00103 
00104     Input:  {
00105               model_file 
00106               data                 
00107               ct_dimensions_input
00108               (ct_dimensions_output)  - for comparison
00109               desc                    - cM data description
00110               model_name              - (earth, svm)
00111               (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00112             }
00113 
00114     Output: {
00115               cm_return - return code >0 if error
00116               (rmse)    - if comparison, root mean square error for predictions vs original
00117               (max_var) - list of points with variation more than max_variation_percent          
00118             }
00119     
00120     """
00121 
00122     mf=i.get('model_file','')
00123     if mf=='':
00124        return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00125 
00126     model_name=i.get('model_name','')
00127     if model_name=='': 
00128        return {'cm_return':1, 'cm_error':'model name is not specified'}
00129 
00130     data=i.get('data',{})
00131     desc=i.get('desc',{})
00132     
00133     dim1=i.get('ct_dimensions_input',[])
00134     dim2=i.get('ct_dimensions_output',[])
00135     
00136     dim=[]
00137     for d in dim1: dim.append(d)
00138 #    for d in dim2: dim.append(d)
00139 
00140     # Generate input tmp csv file name
00141     r=cm_kernel.gen_cm_tmp_file({})
00142     fitmp=r['cm_path1']+'.csv'
00143 
00144     # Generate output tmp file name
00145     r=cm_kernel.gen_cm_tmp_file({})
00146     fotmp=r['cm_path1']+'.csv'
00147 
00148     cm_kernel.print_for_con('')
00149     cm_kernel.print_for_con('Generating tmp CSV file for learning (input='+fitmp+', output='+fotmp+') ...')
00150 
00151     # Convert data to csv 
00152     ii={'cm_run_module_uoa':ini['cfg']['cm_modules']['ctuning.space'],
00153         'cm_action':'convert_ct_multi_array_to_csv',
00154         'cm_multi_array_s': data,
00155         'ct_dimensions': dim,
00156         'ct_dim_desc': desc,
00157         'ct_output_file': fitmp,
00158         'csv_no_header':'yes',
00159         'csv_separator':';',
00160         'csv_decimal_mark':'.'
00161        }
00162     r=cm_kernel.access(ii)
00163     if r['cm_return']>0: return r
00164 
00165     # Prepare calling model
00166     model_code=os.path.join(ini['path'], ini['cfg']['model_code_predict']).replace(cm_kernel.convert_str_to_special('model_name'), model_name)
00167     
00168     cmd='r --vanilla --args '+mf+' '+fitmp+' '+fotmp+' < '+model_code
00169     os.system(cmd)
00170 
00171     # Trying to read csv
00172     if not os.path.isfile(fotmp):
00173        return {'cm_return':1, 'cm_error':'file with predictions was not created'}
00174 
00175     # Read predictions
00176     y=[]
00177 
00178     f=open(fotmp, 'rb')
00179     c=csv.DictReader(f, delimiter=',')
00180     for a in c:
00181         k=a.keys()
00182         if len(k)>0:
00183            y.append(a[k[1]])
00184     f.close()
00185     
00186     # Update original data array
00187     ky=i['ct_dimensions_output'][0]
00188     d0=data[ky]
00189 
00190     rr={'cm_return':0}
00191 
00192     if len(d0)>0:
00193        # If original data is present - substitute and check variation
00194        if len(d0)!=len(y):
00195           return {'cm_return':1, 'cm_error':'length of array with original data and predictions is not matching ('+str(len(data[ky]))+' vs '+str(len(y))+')'}
00196 
00197        tp=desc.get(ky,{}).get('type','')
00198 
00199        mvp=i.get('max_variation_percent','0.2')
00200        dmvp=float(mvp)
00201 
00202        var=[]
00203        
00204        s=0.0
00205        l=range(0, len(data[ky]))
00206        for q in l:
00207            v0=d0[q]
00208            v1=y[q]
00209 
00210            if tp=='float' or tp=='integer':
00211               if tp=='float':
00212                  dv0=float(v0)
00213                  dv1=float(v1)
00214               else:
00215                  dv0=int(v0)
00216                  dv1=int(v1)
00217               s+=(dv0-dv1)*(dv0-dv1)
00218               diff=abs(dv1-dv0)/dv0
00219               x1=''
00220               if diff>dmvp: 
00221                  x1=' ***'
00222                  var.append(q)
00223               print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,x1
00224            else:
00225               if v0!=v1:
00226                  s+=1
00227 
00228            d0[q]=y[q]
00229 
00230        rmse=math.sqrt(s/len(l))
00231 
00232        rr['rmse']=str(rmse)
00233        rr['max_var']=var
00234 
00235        cm_kernel.print_for_con('')
00236        cm_kernel.print_for_con('Model RMSE='+str(rmse))
00237 
00238     else:
00239        data[ky]=y
00240 
00241     # check saving to json (values as strings)
00242     fx=i.get('ct_output_file_s_json','')
00243     if fx!='':
00244        cm_kernel.print_for_con('')
00245        cm_kernel.print_for_con('Recording to json file '+fx+' ...')
00246 
00247        r=cm_kernel.save_array_to_file_as_json({'cm_filename':fx, 'cm_array':{'data':data, 'desc':desc}})
00248        if r['cm_return']>0: return r
00249 
00250     return rr

Generated on Wed May 28 02:49:02 2014 for Collective Mind Framework by DoxyGen 1.6.1
Concept, design and coordination: Grigori Fursin (C) 1993-2013