00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 ini={}
00014 cm_kernel=None
00015
00016
00017 import json
00018 import os
00019 import csv
00020 import math
00021 import numpy as np
00022 import pylab
00023 from scipy.optimize import curve_fit
00024
00025
00026 def init(i):
00027 return {'cm_return':0}
00028
00029
00030 def sigm(x, x0, k, a, c):
00031 """
00032 Signmoidal model
00033
00034 Input: x0,k,a,c
00035 Return: linear function
00036 """
00037 return a / (1 + np.exp(-k*(x-x0))) + c
00038
00039
00040 def is_singularity_power_of_two(x):
00041 """
00042 Separate possible singularities (for now, power of 2)
00043
00044 Input: x - float number
00045 Return: True if power of 2, otherwise False
00046 """
00047
00048 y=int(x)&int(x-1)
00049
00050 if y==0: r=True
00051 else: r=False
00052
00053 return r
00054
00055
00056 def build(i):
00057
00058 """
00059 Build model
00060
00061 Input: {
00062 data
00063 ct_dimensions_input
00064 ct_dimensions_output
00065 desc
00066 model_name - (earth, svm)
00067 (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00068 }
00069
00070 Output: {
00071 cm_return - return code >0 if error
00072 file_with_model - file with model
00073 }
00074
00075 """
00076
00077
00078
00079 data=i['data']
00080
00081 dxs=i['ct_dimensions_input'][0]
00082 dys=i['ct_dimensions_output'][0]
00083
00084 xs=[float(q) for q in data[dxs]]
00085 ys=[float(q) for q in data[dys]]
00086
00087 xx=np.array(xs)
00088 yy=np.array(ys)
00089
00090 x00=[];y00=[]
00091 x0=[];y0=[]
00092
00093 singularities={}
00094
00095 for q in range(0, len(xx)):
00096 x=xx[q]
00097 y=yy[q]
00098
00099 if is_singularity_power_of_two(x+1):
00100 x0.append(x);y0.append(y)
00101 singularities[str(x)]=str(y)
00102 else:
00103 x00.append(x);y00.append(y)
00104
00105 xd = np.array(xx); yd = np.array(yy)
00106 xd00 = np.array(x00); yd00 = np.array(y00)
00107 xd0 = np.array(x0); yd0 = np.array(y0)
00108
00109 p0=[0,0,0,0]
00110 p0[0]=np.min(xx)+(np.max(xx)-np.min(xx))/2
00111 p0[1]=-0.1
00112 p0[2]=1
00113 p0[3]=1
00114
00115 try:
00116 popt, pcov = curve_fit(sigm, xd00, yd00, p0)
00117 except Exception as e:
00118 return {'cm_return':1, 'cm_error':'Curve fit problem ('+format(e)+')'}
00119
00120 pxd00 = np.linspace(np.min(xx), np.max(xx), 50)
00121 pyd00 = sigm(pxd00,*popt)
00122
00123 if i.get('plot','')=='yes':
00124 pylab.plot(xd00, yd00, 'o', label='original data')
00125 pylab.plot(xd0, yd0, '*', label='singularities')
00126 pylab.plot(pxd00,pyd00, label='sigm')
00127 pylab.ylim(0, yy.max()+1)
00128 pylab.legend(loc='upper left')
00129 pylab.grid(True)
00130 pylab.show()
00131
00132
00133 fotmp=i.get('record_data_to_file_prefix','')
00134 if fotmp=='':
00135 r=cm_kernel.gen_cm_tmp_file({})
00136 fotmp=r['cm_path1']
00137
00138 fotmp1=fotmp+'.py.model.json'
00139
00140 model={'x0':str(popt[0]), 'k':str(popt[1]), 'a':str(popt[2]), 'c':str(popt[3]),
00141 'singularities':singularities}
00142
00143 f=open(fotmp1,'wt')
00144 f.write(json.dumps(model, indent=2)+'\n')
00145 f.close()
00146
00147 return {'cm_return':0, 'file_with_model':fotmp1, 'model':model}
00148
00149
00150 def predict(i):
00151
00152 """
00153 Predict using model
00154
00155 Input: {
00156 model_file
00157 data
00158 ct_dimensions_input
00159 (ct_dimensions_output) - for comparison
00160 desc - cM data description
00161 model_name - (earth, svm)
00162 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00163 }
00164
00165 Output: {
00166 cm_return - return code >0 if error
00167 (rmse) - if comparison, root mean square error for predictions vs original
00168 (max_var) - list of points with variation more than max_variation_percent
00169 }
00170
00171 """
00172
00173 mf=i.get('model_file','')
00174 if mf=='':
00175 return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00176
00177
00178 f=file(i['model_file'])
00179 ar=json.loads(f.read())
00180 f.close()
00181
00182 singularities=ar.get('singularities',{})
00183 x0=float(ar['x0']); k=float(ar['k']); a=float(ar['a']); c=float(ar['c'])
00184
00185 data=i.get('data',{})
00186 desc=i.get('desc',{})
00187
00188 dim1=i.get('ct_dimensions_input',[])
00189 dim2=i.get('ct_dimensions_output',[])
00190
00191 kx=i['ct_dimensions_input'][0]
00192 dx=data[kx]
00193
00194 ky=i['ct_dimensions_output'][0]
00195 d0=data[ky]
00196
00197 rr={'cm_return':0}
00198
00199 tp=desc.get(ky,{}).get('type','')
00200
00201 mvp=i.get('max_variation_percent','0.2')
00202 dmvp=float(mvp)
00203
00204 var=[]
00205
00206 s=0.0
00207 l=range(0, len(data[ky]))
00208 for q in l:
00209 v0=d0[q]
00210
00211 xx=dx[q]
00212 x=float(xx)
00213
00214 if str(x) in singularities:
00215 y=singularities[str(x)]
00216 else:
00217 y = sigm(x,x0,k,a,c)
00218
00219 v1=str(y)
00220
00221 if tp=='float' or tp=='integer':
00222 if tp=='float':
00223 dv0=float(v0)
00224 dv1=float(v1)
00225 else:
00226 dv0=int(v0)
00227 dv1=int(v1)
00228 s+=(dv0-dv1)*(dv0-dv1)
00229 diff=abs(dv1-dv0)/dv0
00230 c1=''
00231 if diff>dmvp:
00232 c1=' ***'
00233 var.append(q)
00234 print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,c1
00235 else:
00236 if v0!=v1:
00237 s+=1
00238
00239 d0[q]=v1
00240
00241 rmse=math.sqrt(s/len(l))
00242
00243 rr['rmse']=str(rmse)
00244 rr['max_var']=var
00245
00246 print ''
00247 print 'Model RMSE='+str(rmse)
00248
00249 return rr