Welcome    Usage    Browse    Find CID    Search     Log in

cM API Documentation

model_fgg-hybrid-with-rules.py

Go to the documentation of this file.
00001 #
00002 # Collective Mind
00003 #
00004 # See cM LICENSE.txt for licensing details.
00005 # See cM Copyright.txt for copyright details.
00006 #
00007 # Developer(s): (C) Grigori Fursin, started on 2011.09
00008 #
00009 
00010 # Should always be here
00011 ini={}
00012 cm_kernel=None
00013 
00014 # Local settings
00015 import json
00016 import os
00017 import csv
00018 import math
00019 import numpy as np
00020 import pylab
00021 from scipy.optimize import curve_fit
00022 
00023 # ============================================================================
00024 def init(i):
00025     return {'cm_return':0}
00026 
00027 # ============================================================================
00028 def is_singularity_power_of_two(x):
00029     """
00030     Separate possible singularities (for now, power of 2)
00031 
00032     Input:  x - float number
00033     Return: True if power of 2, otherwise False
00034     """
00035 
00036     y=int(x)&int(x-1)
00037 
00038     if y==0: r=True
00039     else: r=False
00040 
00041     return r
00042 
00043 # ============================================================================
00044 def lm(x, x1, a1, k1):
00045     """
00046     Linear model 
00047 
00048     Input:  x1,a1,k1
00049     Return: linear function
00050     """
00051 
00052     return a1+k1*(x-x1)
00053 
00054 # ============================================================================
00055 def build(i):
00056 
00057     """
00058     Build model
00059 
00060     Input:  {
00061               data                 
00062               ct_dimensions_input
00063               ct_dimensions_output
00064               desc
00065               model_name                   - (earth, svm)
00066               (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00067               additional_params            - x1 - cuts on first axis
00068                                              x2 - cuts on first axis
00069                                              x3 - cuts on first axis
00070             }
00071 
00072     Output: {
00073               cm_return       - return code >0 if error
00074               file_with_model - file with model
00075             }
00076     
00077     """
00078 
00079     # We can deal here only with one input and one output dimension for now
00080 
00081     data=i['data']
00082 
00083     dxs=i['ct_dimensions_input'][0]
00084     dys=i['ct_dimensions_output'][0]
00085 
00086     ap=i['additional_params']
00087 
00088     cx1=float(ap['x1'])
00089     cx2=float(ap['x2'])
00090     cx3=float(ap['x3'])
00091 
00092     xs=[float(q) for q in data[dxs]]
00093     ys=[float(q) for q in data[dys]]
00094 
00095     xx=np.array(xs)
00096     yy=np.array(ys)
00097 
00098     x00=[];y00=[]
00099     x0=[];y0=[]
00100     x1=[];y1=[]
00101     x2=[];y2=[]
00102     x3=[];y3=[]
00103     x4=[];y4=[]
00104 
00105     singularities={}
00106 
00107     for q in range(0, len(xx)):
00108         x=xx[q]
00109         y=yy[q]
00110 
00111         if is_singularity_power_of_two(x+1): 
00112            x0.append(x);y0.append(y)
00113            singularities[str(x)]=str(y)
00114         else:
00115           x00.append(x);y00.append(y) 
00116           if x<cx1:              x1.append(x);y1.append(y)
00117           elif x<cx2:            x2.append(x);y2.append(y)
00118           elif x<cx3:            x3.append(x);y3.append(y)
00119           else:                  x4.append(x);y4.append(y)
00120 
00121     xd = np.array(xx); yd = np.array(yy)
00122     xd00 = np.array(x00); yd00 = np.array(y00)
00123     xd0 = np.array(x0); yd0 = np.array(y0)
00124     xd1 = np.array(x1); yd1 = np.array(y1)
00125     xd2 = np.array(x2); yd2 = np.array(y2)
00126     xd3 = np.array(x3); yd3 = np.array(y3)
00127     xd4 = np.array(x4); yd4 = np.array(y4)
00128 
00129     try:
00130        popt1, pcov1 = curve_fit(lm, xd1, yd1)
00131        popt2, pcov2 = curve_fit(lm, xd2, yd2)
00132        popt3, pcov3 = curve_fit(lm, xd3, yd3)
00133        popt4, pcov4 = curve_fit(lm, xd4, yd4)
00134     except Exception as e:
00135        return {'cm_return':1, 'cm_error':'Curve fit problem ('+format(e)+')'}
00136 
00137     px1 = np.linspace(1, cx1, 50)
00138     px2 = np.linspace(cx1, cx2, 50)
00139     px3 = np.linspace(cx2, cx3, 50)
00140     px4 = np.linspace(cx3, xx.max(), 50)
00141 
00142     py1 = lm(px1,*popt1)
00143     py2 = lm(px2,*popt2)
00144     py3 = lm(px3,*popt3)
00145     py4 = lm(px4,*popt4)
00146 
00147     if i.get('plot','')=='yes':
00148        pylab.plot(xd00, yd00, 'o', label='original data')
00149        pylab.plot(xd0, yd0, '*', label='singularities')
00150        pylab.plot(px1,py1, label='lm1')
00151        pylab.plot(px2,py2, label='lm2')
00152        pylab.plot(px3,py3, label='lm3')
00153        pylab.plot(px4,py4, label='lm4')
00154        pylab.ylim(0, yy.max()+1)
00155        pylab.legend(loc='upper left')
00156        pylab.grid(True)
00157        pylab.show()
00158 
00159     # Generate output tmp file name
00160     fotmp=i.get('record_data_to_file_prefix','')
00161     if fotmp=='':
00162        r=cm_kernel.gen_cm_tmp_file({})
00163        fotmp=r['cm_path1']
00164 
00165     fotmp1=fotmp+'.py.model.json'
00166 
00167     model={'cx1':str(cx1), 'x1':str(popt1[0]), 'a1':str(popt1[1]), 'k1':str(popt1[2]),
00168            'cx2':str(cx2), 'x2':str(popt2[0]), 'a2':str(popt2[1]), 'k2':str(popt2[2]),
00169            'cx3':str(cx3), 'x3':str(popt3[0]), 'a3':str(popt3[1]), 'k3':str(popt3[2]),
00170            'x4':str(popt4[0]), 'a4':str(popt4[1]), 'k4':str(popt4[2]),
00171            'singularities':singularities}
00172 
00173     f=open(fotmp1,'wt')
00174     f.write(json.dumps(model, indent=2)+'\n')
00175     f.close()
00176 
00177     return {'cm_return':0, 'file_with_model':fotmp1, 'model':model}
00178 
00179 # ============================================================================
00180 def predict(i):
00181 
00182     """
00183     Predict using model
00184 
00185     Input:  {
00186               model_file 
00187               data                 
00188               ct_dimensions_input
00189               (ct_dimensions_output)  - for comparison
00190               desc                    - cM data description
00191               model_name              - (earth, svm)
00192               (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00193             }
00194 
00195     Output: {
00196               cm_return - return code >0 if error
00197               (rmse)    - if comparison, root mean square error for predictions vs original
00198               (max_var) - list of points with variation more than max_variation_percent          
00199             }
00200     
00201     """
00202 
00203     mf=i.get('model_file','')
00204     if mf=='':
00205        return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00206 
00207     # Load coefficients
00208     f=file(i['model_file'])
00209     ar=json.loads(f.read())
00210     f.close()
00211 
00212     singularities=ar.get('singularities',{})
00213     cx1=float(ar['cx1']); x1=float(ar['x1']); a1=float(ar['a1']); k1=float(ar['k1'])
00214     cx2=float(ar['cx2']); x2=float(ar['x2']); a2=float(ar['a2']); k2=float(ar['k2'])
00215     cx3=float(ar['cx3']); x3=float(ar['x3']); a3=float(ar['a3']); k3=float(ar['k3'])
00216     x4=float(ar['x4']); a4=float(ar['a4']); k4=float(ar['k4'])
00217 
00218     data=i.get('data',{})
00219     desc=i.get('desc',{})
00220     
00221     dim1=i.get('ct_dimensions_input',[])
00222     dim2=i.get('ct_dimensions_output',[])
00223     
00224     kx=i['ct_dimensions_input'][0]
00225     dx=data[kx]
00226 
00227     ky=i['ct_dimensions_output'][0]
00228     d0=data[ky]
00229 
00230     rr={'cm_return':0}
00231 
00232     tp=desc.get(ky,{}).get('type','')
00233 
00234     mvp=i.get('max_variation_percent','0.2')
00235     dmvp=float(mvp)
00236 
00237     var=[]
00238     
00239     s=0.0
00240     l=range(0, len(data[ky]))
00241     nn=0
00242     for q in l:
00243         v0=d0[q]
00244         
00245         xx=dx[q]
00246         x=float(xx)
00247 
00248         if str(x) in singularities:
00249            y=singularities[str(x)]
00250         else:
00251           if x<cx1:   y=lm(x, x1, a1, k1)
00252           elif x<cx2: y=lm(x, x2, a2, k2)
00253           elif x<cx3: y=lm(x, x3, a3, k3)
00254           else:       y=lm(x, x4, a4, k4)
00255         
00256         v1=str(y)
00257 
00258         if tp=='float' or tp=='integer':
00259            nn+=1
00260            if tp=='float':
00261               dv0=float(v0)
00262               dv1=float(v1)
00263            else:
00264               dv0=int(v0)
00265               dv1=int(v1)
00266            s+=(dv0-dv1)*(dv0-dv1)
00267            diff=abs(dv1-dv0)/dv0
00268            c1=''
00269            if diff>dmvp: 
00270               c1=' ***'
00271               var.append(q)
00272            print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,c1
00273 #        else:
00274 #           if v0!=v1:
00275 #              s+=1
00276 
00277         d0[q]=v1
00278 
00279     rmse=math.sqrt(s/nn)
00280 
00281     rr['rmse']=str(rmse)
00282     rr['max_var']=var
00283 
00284     print ''
00285     print 'Model RMSE='+str(rmse)
00286 
00287     return rr

Generated on Wed May 28 02:49:02 2014 for Collective Mind Framework by DoxyGen 1.6.1
Concept, design and coordination: Grigori Fursin (C) 1993-2013