Welcome    Usage    Browse    Find CID    Search     Log in

cM API Documentation

model_fgg-hybrid-sigm-sing.py

Go to the documentation of this file.
00001 #
00002 # Collective Mind
00003 #
00004 # See cM LICENSE.txt for licensing details.
00005 # See cM Copyright.txt for copyright details.
00006 #
00007 # Developer(s): (C) Grigori Fursin, started on 2011.09
00008 #
00009 
00010 # NOT FINISHED!
00011 
00012 # Should always be here
00013 ini={}
00014 cm_kernel=None
00015 
00016 # Local settings
00017 import json
00018 import os
00019 import csv
00020 import math
00021 import numpy as np
00022 import pylab
00023 from scipy.optimize import curve_fit
00024 
00025 # ============================================================================
00026 def init(i):
00027     return {'cm_return':0}
00028 
00029 # ============================================================================
00030 def sigm(x, x0, k, a, c):
00031     """
00032     Signmoidal model 
00033 
00034     Input:  x0,k,a,c
00035     Return: linear function
00036     """
00037     return a / (1 + np.exp(-k*(x-x0))) + c
00038 
00039 # ============================================================================
00040 def is_singularity_power_of_two(x):
00041     """
00042     Separate possible singularities (for now, power of 2)
00043 
00044     Input:  x - float number
00045     Return: True if power of 2, otherwise False
00046     """
00047 
00048     y=int(x)&int(x-1)
00049 
00050     if y==0: r=True
00051     else: r=False
00052 
00053     return r
00054 
00055 # ============================================================================
00056 def build(i):
00057 
00058     """
00059     Build model
00060 
00061     Input:  {
00062               data                 
00063               ct_dimensions_input
00064               ct_dimensions_output
00065               desc
00066               model_name                   - (earth, svm)
00067               (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00068             }
00069 
00070     Output: {
00071               cm_return       - return code >0 if error
00072               file_with_model - file with model
00073             }
00074     
00075     """
00076 
00077     # We can deal here only with one input and one output dimension for now
00078 
00079     data=i['data']
00080 
00081     dxs=i['ct_dimensions_input'][0]
00082     dys=i['ct_dimensions_output'][0]
00083 
00084     xs=[float(q) for q in data[dxs]]
00085     ys=[float(q) for q in data[dys]]
00086 
00087     xx=np.array(xs)
00088     yy=np.array(ys)
00089 
00090     x00=[];y00=[]
00091     x0=[];y0=[]
00092 
00093     singularities={}
00094 
00095     for q in range(0, len(xx)):
00096         x=xx[q]
00097         y=yy[q]
00098 
00099         if is_singularity_power_of_two(x+1): 
00100            x0.append(x);y0.append(y)
00101            singularities[str(x)]=str(y)
00102         else:
00103           x00.append(x);y00.append(y) 
00104 
00105     xd = np.array(xx); yd = np.array(yy)
00106     xd00 = np.array(x00); yd00 = np.array(y00)
00107     xd0 = np.array(x0); yd0 = np.array(y0)
00108 
00109     p0=[0,0,0,0]
00110     p0[0]=np.min(xx)+(np.max(xx)-np.min(xx))/2
00111     p0[1]=-0.1
00112     p0[2]=1
00113     p0[3]=1
00114 
00115     try:
00116        popt, pcov = curve_fit(sigm, xd00, yd00, p0)
00117     except Exception as e:
00118        return {'cm_return':1, 'cm_error':'Curve fit problem ('+format(e)+')'}
00119 
00120     pxd00 = np.linspace(np.min(xx), np.max(xx), 50)
00121     pyd00 = sigm(pxd00,*popt)
00122 
00123     if i.get('plot','')=='yes':
00124        pylab.plot(xd00, yd00, 'o', label='original data')
00125        pylab.plot(xd0, yd0, '*', label='singularities')
00126        pylab.plot(pxd00,pyd00, label='sigm')
00127        pylab.ylim(0, yy.max()+1)
00128        pylab.legend(loc='upper left')
00129        pylab.grid(True)
00130        pylab.show()
00131 
00132     # Generate output tmp file name
00133     fotmp=i.get('record_data_to_file_prefix','')
00134     if fotmp=='':
00135        r=cm_kernel.gen_cm_tmp_file({})
00136        fotmp=r['cm_path1']
00137 
00138     fotmp1=fotmp+'.py.model.json'
00139 
00140     model={'x0':str(popt[0]), 'k':str(popt[1]), 'a':str(popt[2]), 'c':str(popt[3]),
00141            'singularities':singularities}
00142 
00143     f=open(fotmp1,'wt')
00144     f.write(json.dumps(model, indent=2)+'\n')
00145     f.close()
00146 
00147     return {'cm_return':0, 'file_with_model':fotmp1, 'model':model}
00148 
00149 # ============================================================================
00150 def predict(i):
00151 
00152     """
00153     Predict using model
00154 
00155     Input:  {
00156               model_file 
00157               data                 
00158               ct_dimensions_input
00159               (ct_dimensions_output)  - for comparison
00160               desc                    - cM data description
00161               model_name              - (earth, svm)
00162               (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00163             }
00164 
00165     Output: {
00166               cm_return - return code >0 if error
00167               (rmse)    - if comparison, root mean square error for predictions vs original
00168               (max_var) - list of points with variation more than max_variation_percent          
00169             }
00170     
00171     """
00172 
00173     mf=i.get('model_file','')
00174     if mf=='':
00175        return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00176 
00177     # Load coefficients
00178     f=file(i['model_file'])
00179     ar=json.loads(f.read())
00180     f.close()
00181 
00182     singularities=ar.get('singularities',{})
00183     x0=float(ar['x0']); k=float(ar['k']); a=float(ar['a']); c=float(ar['c'])
00184 
00185     data=i.get('data',{})
00186     desc=i.get('desc',{})
00187     
00188     dim1=i.get('ct_dimensions_input',[])
00189     dim2=i.get('ct_dimensions_output',[])
00190     
00191     kx=i['ct_dimensions_input'][0]
00192     dx=data[kx]
00193 
00194     ky=i['ct_dimensions_output'][0]
00195     d0=data[ky]
00196 
00197     rr={'cm_return':0}
00198 
00199     tp=desc.get(ky,{}).get('type','')
00200 
00201     mvp=i.get('max_variation_percent','0.2')
00202     dmvp=float(mvp)
00203 
00204     var=[]
00205     
00206     s=0.0
00207     l=range(0, len(data[ky]))
00208     for q in l:
00209         v0=d0[q]
00210         
00211         xx=dx[q]
00212         x=float(xx)
00213 
00214         if str(x) in singularities:
00215            y=singularities[str(x)]
00216         else:
00217            y = sigm(x,x0,k,a,c)
00218         
00219         v1=str(y)
00220 
00221         if tp=='float' or tp=='integer':
00222            if tp=='float':
00223               dv0=float(v0)
00224               dv1=float(v1)
00225            else:
00226               dv0=int(v0)
00227               dv1=int(v1)
00228            s+=(dv0-dv1)*(dv0-dv1)
00229            diff=abs(dv1-dv0)/dv0
00230            c1=''
00231            if diff>dmvp: 
00232               c1=' ***'
00233               var.append(q)
00234            print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,c1
00235         else:
00236            if v0!=v1:
00237               s+=1
00238 
00239         d0[q]=v1
00240 
00241     rmse=math.sqrt(s/len(l))
00242 
00243     rr['rmse']=str(rmse)
00244     rr['max_var']=var
00245 
00246     print ''
00247     print 'Model RMSE='+str(rmse)
00248 
00249     return rr

Generated on Wed May 28 02:49:02 2014 for Collective Mind Framework by DoxyGen 1.6.1
Concept, design and coordination: Grigori Fursin (C) 1993-2013