00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 ini={}
00012 cm_kernel=None
00013
00014
00015 import json
00016 import os
00017 import csv
00018 import math
00019 import numpy as np
00020 import pylab
00021 from scipy.optimize import curve_fit
00022
00023
00024 def init(i):
00025 return {'cm_return':0}
00026
00027
00028 def is_singularity_power_of_two(x):
00029 """
00030 Separate possible singularities (for now, power of 2)
00031
00032 Input: x - float number
00033 Return: True if power of 2, otherwise False
00034 """
00035
00036 y=int(x)&int(x-1)
00037
00038 if y==0: r=True
00039 else: r=False
00040
00041 return r
00042
00043
00044 def lm(x, x1, a1, k1):
00045 """
00046 Linear model
00047
00048 Input: x1,a1,k1
00049 Return: linear function
00050 """
00051
00052 return a1+k1*(x-x1)
00053
00054
00055 def build(i):
00056
00057 """
00058 Build model
00059
00060 Input: {
00061 data
00062 ct_dimensions_input
00063 ct_dimensions_output
00064 desc
00065 model_name - (earth, svm)
00066 (record_data_to_file_prefix) - if !='', use this filename prefix instead of randomly generated
00067 additional_params - x1 - cuts on first axis
00068 x2 - cuts on first axis
00069 x3 - cuts on first axis
00070 }
00071
00072 Output: {
00073 cm_return - return code >0 if error
00074 file_with_model - file with model
00075 }
00076
00077 """
00078
00079
00080
00081 data=i['data']
00082
00083 dxs=i['ct_dimensions_input'][0]
00084 dys=i['ct_dimensions_output'][0]
00085
00086 ap=i['additional_params']
00087
00088 cx1=float(ap['x1'])
00089 cx2=float(ap['x2'])
00090 cx3=float(ap['x3'])
00091
00092 xs=[float(q) for q in data[dxs]]
00093 ys=[float(q) for q in data[dys]]
00094
00095 xx=np.array(xs)
00096 yy=np.array(ys)
00097
00098 x00=[];y00=[]
00099 x0=[];y0=[]
00100 x1=[];y1=[]
00101 x2=[];y2=[]
00102 x3=[];y3=[]
00103 x4=[];y4=[]
00104
00105 singularities={}
00106
00107 for q in range(0, len(xx)):
00108 x=xx[q]
00109 y=yy[q]
00110
00111 if is_singularity_power_of_two(x+1):
00112 x0.append(x);y0.append(y)
00113 singularities[str(x)]=str(y)
00114 else:
00115 x00.append(x);y00.append(y)
00116 if x<cx1: x1.append(x);y1.append(y)
00117 elif x<cx2: x2.append(x);y2.append(y)
00118 elif x<cx3: x3.append(x);y3.append(y)
00119 else: x4.append(x);y4.append(y)
00120
00121 xd = np.array(xx); yd = np.array(yy)
00122 xd00 = np.array(x00); yd00 = np.array(y00)
00123 xd0 = np.array(x0); yd0 = np.array(y0)
00124 xd1 = np.array(x1); yd1 = np.array(y1)
00125 xd2 = np.array(x2); yd2 = np.array(y2)
00126 xd3 = np.array(x3); yd3 = np.array(y3)
00127 xd4 = np.array(x4); yd4 = np.array(y4)
00128
00129 try:
00130 popt1, pcov1 = curve_fit(lm, xd1, yd1)
00131 popt2, pcov2 = curve_fit(lm, xd2, yd2)
00132 popt3, pcov3 = curve_fit(lm, xd3, yd3)
00133 popt4, pcov4 = curve_fit(lm, xd4, yd4)
00134 except Exception as e:
00135 return {'cm_return':1, 'cm_error':'Curve fit problem ('+format(e)+')'}
00136
00137 px1 = np.linspace(1, cx1, 50)
00138 px2 = np.linspace(cx1, cx2, 50)
00139 px3 = np.linspace(cx2, cx3, 50)
00140 px4 = np.linspace(cx3, xx.max(), 50)
00141
00142 py1 = lm(px1,*popt1)
00143 py2 = lm(px2,*popt2)
00144 py3 = lm(px3,*popt3)
00145 py4 = lm(px4,*popt4)
00146
00147 if i.get('plot','')=='yes':
00148 pylab.plot(xd00, yd00, 'o', label='original data')
00149 pylab.plot(xd0, yd0, '*', label='singularities')
00150 pylab.plot(px1,py1, label='lm1')
00151 pylab.plot(px2,py2, label='lm2')
00152 pylab.plot(px3,py3, label='lm3')
00153 pylab.plot(px4,py4, label='lm4')
00154 pylab.ylim(0, yy.max()+1)
00155 pylab.legend(loc='upper left')
00156 pylab.grid(True)
00157 pylab.show()
00158
00159
00160 fotmp=i.get('record_data_to_file_prefix','')
00161 if fotmp=='':
00162 r=cm_kernel.gen_cm_tmp_file({})
00163 fotmp=r['cm_path1']
00164
00165 fotmp1=fotmp+'.py.model.json'
00166
00167 model={'cx1':str(cx1), 'x1':str(popt1[0]), 'a1':str(popt1[1]), 'k1':str(popt1[2]),
00168 'cx2':str(cx2), 'x2':str(popt2[0]), 'a2':str(popt2[1]), 'k2':str(popt2[2]),
00169 'cx3':str(cx3), 'x3':str(popt3[0]), 'a3':str(popt3[1]), 'k3':str(popt3[2]),
00170 'x4':str(popt4[0]), 'a4':str(popt4[1]), 'k4':str(popt4[2]),
00171 'singularities':singularities}
00172
00173 f=open(fotmp1,'wt')
00174 f.write(json.dumps(model, indent=2)+'\n')
00175 f.close()
00176
00177 return {'cm_return':0, 'file_with_model':fotmp1, 'model':model}
00178
00179
00180 def predict(i):
00181
00182 """
00183 Predict using model
00184
00185 Input: {
00186 model_file
00187 data
00188 ct_dimensions_input
00189 (ct_dimensions_output) - for comparison
00190 desc - cM data description
00191 model_name - (earth, svm)
00192 (max_variation_percent) - for comparison, report points where variation is more than this number (default=0.2)
00193 }
00194
00195 Output: {
00196 cm_return - return code >0 if error
00197 (rmse) - if comparison, root mean square error for predictions vs original
00198 (max_var) - list of points with variation more than max_variation_percent
00199 }
00200
00201 """
00202
00203 mf=i.get('model_file','')
00204 if mf=='':
00205 return {'cm_return':1, 'cm_error':'"model_file" is not defined'}
00206
00207
00208 f=file(i['model_file'])
00209 ar=json.loads(f.read())
00210 f.close()
00211
00212 singularities=ar.get('singularities',{})
00213 cx1=float(ar['cx1']); x1=float(ar['x1']); a1=float(ar['a1']); k1=float(ar['k1'])
00214 cx2=float(ar['cx2']); x2=float(ar['x2']); a2=float(ar['a2']); k2=float(ar['k2'])
00215 cx3=float(ar['cx3']); x3=float(ar['x3']); a3=float(ar['a3']); k3=float(ar['k3'])
00216 x4=float(ar['x4']); a4=float(ar['a4']); k4=float(ar['k4'])
00217
00218 data=i.get('data',{})
00219 desc=i.get('desc',{})
00220
00221 dim1=i.get('ct_dimensions_input',[])
00222 dim2=i.get('ct_dimensions_output',[])
00223
00224 kx=i['ct_dimensions_input'][0]
00225 dx=data[kx]
00226
00227 ky=i['ct_dimensions_output'][0]
00228 d0=data[ky]
00229
00230 rr={'cm_return':0}
00231
00232 tp=desc.get(ky,{}).get('type','')
00233
00234 mvp=i.get('max_variation_percent','0.2')
00235 dmvp=float(mvp)
00236
00237 var=[]
00238
00239 s=0.0
00240 l=range(0, len(data[ky]))
00241 nn=0
00242 for q in l:
00243 v0=d0[q]
00244
00245 xx=dx[q]
00246 x=float(xx)
00247
00248 if str(x) in singularities:
00249 y=singularities[str(x)]
00250 else:
00251 if x<cx1: y=lm(x, x1, a1, k1)
00252 elif x<cx2: y=lm(x, x2, a2, k2)
00253 elif x<cx3: y=lm(x, x3, a3, k3)
00254 else: y=lm(x, x4, a4, k4)
00255
00256 v1=str(y)
00257
00258 if tp=='float' or tp=='integer':
00259 nn+=1
00260 if tp=='float':
00261 dv0=float(v0)
00262 dv1=float(v1)
00263 else:
00264 dv0=int(v0)
00265 dv1=int(v1)
00266 s+=(dv0-dv1)*(dv0-dv1)
00267 diff=abs(dv1-dv0)/dv0
00268 c1=''
00269 if diff>dmvp:
00270 c1=' ***'
00271 var.append(q)
00272 print "%7s" % data[dim1[0]][q], "%7.3f" % dv0, "%7.3f" % dv1, "%7.3f" % s, "%5.3f" % diff,c1
00273
00274
00275
00276
00277 d0[q]=v1
00278
00279 rmse=math.sqrt(s/nn)
00280
00281 rr['rmse']=str(rmse)
00282 rr['max_var']=var
00283
00284 print ''
00285 print 'Model RMSE='+str(rmse)
00286
00287 return rr