Welcome    Usage    Browse    Find CID    Search     Log in

cM API Documentation

module.py

Go to the documentation of this file.
00001 #
00002 # Collective Mind
00003 #
00004 # See cM LICENSE.txt for licensing details.
00005 # See cM Copyright.txt for copyright details.
00006 #
00007 # Developer(s): (C) Grigori Fursin, started on 2011.09
00008 #
00009 
00010 # Should always be here
00011 ini={}
00012 cm_kernel=None
00013 
00014 # Local settings
00015 import os
00016 import time
00017 import urllib
00018 import urllib2
00019 import json
00020 import threading
00021 import time
00022 import re
00023 import copy
00024 
00025 # ============================================================================
00026 def init(i):
00027     return {'cm_return':0}
00028 
00029 # ============================================================================
00030 def server(i):
00031 
00032     """
00033     Index Server
00034 
00035     Input:  {
00036               (cm_index_uoa)  - uoa with description of indexer
00037               (os_uoa)        - uoa with OS description (needed for script extension, for example)
00038               (cm_index_port) - web port
00039             }
00040 
00041     Output: {
00042               cm_return  - return code >0 if error
00043             }
00044     """
00045 
00046     cm_kernel.print_for_con("For now we can only start server indefinitely")
00047     cm_kernel.print_for_con("but we should add a nice start/stop/resume support one day")
00048 
00049     # Check (default) index configuration
00050     index_uoa='default'
00051     if 'cm_index_uoa' in i and i['cm_index_uoa']!='': index_uoa=i['cm_index_uoa']
00052     elif 'cm_default_index_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_index_uoa']!='':
00053        index_uoa=cm_kernel.ini['dcfg']['cm_default_index_uoa']
00054 
00055     if index_uoa=='':
00056        return {'cm_return':1, 'cm_error':'"cm_index_uoa" is not defined'}
00057 
00058     # Load index
00059     r=cm_kernel.access({'cm_run_module_uoa':ini['cm_module_uid'],
00060                         'cm_action':'load',
00061                         'cm_data_uoa':index_uoa})
00062     if r['cm_return']>0: return r
00063     index_cfg=r['cm_data_obj']['cfg']
00064 
00065     port=index_cfg.get('port','9200')
00066     support_port=index_cfg.get('support_port','9300')
00067     host=index_cfg['host']+':'+port
00068 
00069     # Check various variables
00070     if 'code_uoa' in index_cfg and index_cfg['code_uoa']!='':
00071        # We need then OS to load properly environment script
00072 
00073        # Check (default) OS configuration
00074        os_uoa=''
00075        if 'os_uoa' in i and i['os_uoa']!='': os_uoa=i['os_uoa']
00076        elif 'cm_default_os_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_os_uoa']!='':
00077           os_uoa=cm_kernel.ini['dcfg']['cm_default_os_uoa']
00078 
00079        if os_uoa=='':
00080           return {'cm_return':1, 'cm_error':'"os_uoa" is not defined'}
00081 
00082        # Get environment
00083        r=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['code'],
00084                            'cm_action':'get_env',
00085                            'cm_data_uoa':index_cfg['code_uoa'],
00086                            'os_uoa':os_uoa})
00087        if r['cm_return']>0: return r
00088        code_env=r['cm_string']
00089 
00090        # Load OS
00091        ii={'cm_run_module_uoa':ini['cfg']['cm_modules']['os'],
00092            'cm_action':'load',
00093            'cm_data_uoa':os_uoa}
00094        r=cm_kernel.access(ii)
00095        if r['cm_return']>0: return r
00096 
00097        os_cfg=r['cm_data_obj']['cfg']
00098 
00099        if 'name' not in os_cfg:
00100           return {'cm_return':1, 'cm_error':'"name" is not defined in os configuration'}
00101 
00102        os_name=os_cfg['name']
00103 
00104        if os_name not in index_cfg['run_cmd']:
00105           return {'cm_return':1, 'cm_error':'"name" is not defined in os configuration'}
00106 
00107        run_cmd_t=index_cfg['run_cmd'][os_name]
00108 
00109        run_cmd=run_cmd_t.replace(cm_kernel.convert_str_to_special('code_env'), code_env)
00110        run_cmd=run_cmd.replace(cm_kernel.convert_str_to_special('index_port'), port)
00111        run_cmd=run_cmd.replace(cm_kernel.convert_str_to_special('index_support_port'), support_port)
00112 
00113        cm_kernel.print_for_con('')
00114        cm_kernel.print_for_con('Starting cM index server: '+host+' ...')
00115        cm_kernel.print_for_con('')
00116        cm_kernel.print_for_con(run_cmd)
00117 
00118        r=os.system(run_cmd)
00119 
00120        if r>0:
00121           return {'cm_return':1, 'cm_error':'problem starting cM index server (return code='+str(r)+')'}
00122 
00123 #       time.sleep(6) # Force sleep to wait while server starts, then try to add 1 entry (in case of flush)
00124 #
00125 #       # Update 1 index
00126 #       ii = {'cm_module_uoa':ini['cm_module_uid'], \
00127 #             'cm_data_uoa':'default', \
00128 #             'mode': 'update'}
00129 #       r=index(ii)
00130 #       if r['cm_return']>0:
00131 #          cm_kernel.print_for_con('cM error: '+r['cm_error'])
00132 #       elif r['cm_return']<0:
00133 #          cm_kernel.print_for_con('cM warning: '+r['cm_warning'])
00134 
00135 
00136 
00137     return {'cm_return':0}
00138 
00139 # ============================================================================
00140 def flush(i):
00141 
00142     """
00143     Flush index
00144 
00145     Input:  {
00146               (cm_index_uoa)  - uoa with description of indexer
00147               (os_uoa)        - uoa with OS description (needed for script extension, for example)
00148               (cm_index_port) - web port
00149             }
00150 
00151     Output: {
00152               cm_return  - return code >0 if error
00153             }
00154     """
00155 
00156     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00157        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00158 
00159     cm_kernel.print_for_con("Flushing index ...")
00160     cm_kernel.print_for_con("")
00161 
00162     # Check (default) index configuration
00163     index_uoa=''
00164     if 'cm_index_uoa' in i and i['cm_index_uoa']!='': index_uoa=i['cm_index_uoa']
00165     elif 'cm_default_index_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_index_uoa']!='':
00166        index_uoa=cm_kernel.ini['dcfg']['cm_default_index_uoa']
00167 
00168     if index_uoa=='':
00169        return {'cm_return':1, 'cm_error':'"cm_index_uoa" is not defined'}
00170 
00171     # Load index
00172     r=cm_kernel.access({'cm_run_module_uoa':ini['cm_module_uid'],
00173                         'cm_action':'load',
00174                         'cm_data_uoa':index_uoa})
00175     if r['cm_return']>0: return r
00176     index_cfg=r['cm_data_obj']['cfg']
00177 
00178     host=index_cfg['host']+':'+index_cfg.get('port','9200')
00179 
00180     if cm_kernel.ini['dcfg'].get('use_curl_for_indexing','')=='yes':
00181        # First delete index
00182        cmd='curl -XDELETE '+host
00183        os.system(cmd)
00184     else:
00185        url=host+'/_flush'
00186 
00187        request = urllib2.Request(url)
00188 
00189        s=''
00190        try:
00191           f=urllib2.urlopen(request, None)
00192        except urllib2.URLError, e:
00193           return {'cm_return':1, 'cm_error':'problem accessing index server ('+format(e)+')'}
00194        else:
00195           s=str(f.read())
00196           try:
00197              j=json.loads(s)
00198           except:
00199              cm_kernel.print_for_con('cM error: can\'t parse output from index server ('+s+')')
00200              return {'cm_return':1, 'cm_error':'can\'t parse output from index server ('+s+')'}
00201           if not j["ok"]:
00202              cm_kernel.print_for_con('cM error: flushing was not successful ('+s+')')
00203              return {'cm_return':1, 'cm_error':'flushing was not successful ('+s+')'}
00204 
00205           f.close()
00206 
00207        cm_kernel.print_for_con('')
00208        cm_kernel.print_for_con('Output from the index server:')
00209        cm_kernel.print_for_con(s)
00210 
00211     # Index at least yourself (module)
00212 
00213     ii={'cm_module_uoa':ini['cfg']['cm_modules']['cm-module'],
00214         'cm_data_uoa':ini['cm_module_uid'],
00215         'mode': 'add',
00216         'cm_console':''}
00217     return index(ii)
00218 
00219 # ============================================================================
00220 def test(i):
00221 
00222     """
00223     Test index server
00224 
00225     Input:  {
00226             }
00227 
00228     Output: {
00229               cm_return  - return code >0 if error
00230             }
00231     """
00232 
00233     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00234        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00235 
00236     cm_kernel.print_for_con("Testing index server ...")
00237     cm_kernel.print_for_con("")
00238 
00239     # Check (default) index configuration
00240     index_uoa=''
00241     if 'cm_index_uoa' in i and i['cm_index_uoa']!='': index_uoa=i['cm_index_uoa']
00242     elif 'cm_default_index_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_index_uoa']!='':
00243        index_uoa=cm_kernel.ini['dcfg']['cm_default_index_uoa']
00244 
00245     if index_uoa=='':
00246        return {'cm_return':1, 'cm_error':'"cm_index_uoa" is not defined'}
00247 
00248     # Load index
00249     r=cm_kernel.access({'cm_run_module_uoa':ini['cm_module_uid'],
00250                         'cm_action':'load',
00251                         'cm_data_uoa':index_uoa})
00252     if r['cm_return']>0: return r
00253     index_cfg=r['cm_data_obj']['cfg']
00254 
00255     host=index_cfg['host']+':'+index_cfg.get('port','9200')
00256 
00257     if cm_kernel.ini['dcfg'].get('use_curl_for_indexing','')=='yes':
00258        # First delete index
00259        cmd='curl '+host
00260        os.system(cmd)
00261     else:
00262        request = urllib2.Request(host)
00263 
00264        s=''
00265        try:
00266           f=urllib2.urlopen(request, None)
00267        except urllib2.URLError, e:
00268           return {'cm_return':1, 'cm_error':'problem accessing index server ('+format(e)+')'}
00269        else:
00270           s=str(f.read())
00271           try:
00272              j=json.loads(s)
00273           except:
00274              cm_kernel.print_for_con('cM error: can\'t parse output from index server ('+s+')')
00275              return {'cm_return':1, 'cm_error':'can\'t parse output from index server ('+s+')'}
00276           if not j["ok"]:
00277              cm_kernel.print_for_con('cM error: flushing was not successful ('+s+')')
00278              return {'cm_return':1, 'cm_error':'flushing was not successful ('+s+')'}
00279 
00280           f.close()
00281 
00282        cm_kernel.print_for_con('')
00283        cm_kernel.print_for_con('Output from the index server:')
00284        cm_kernel.print_for_con(s)
00285 
00286     return {'cm_return':0}
00287 
00288 # ============================================================================
00289 def all(i):
00290 
00291     """
00292     ReIndex all repos
00293 
00294     Input:  {
00295               (index_repo_uoa)   - repo uoa to prune reindexing
00296               (index_module_uoa) - module uoa to prune reindexing
00297               (index_data_uoa)   - data uoa to prune reindexing (can include pattern * and ?)
00298               (delay)            - delay after indexing individual entry
00299               (sequential)       - if 'yes' perform sequential index (very slow, but reports errors)
00300             }
00301 
00302     Output: {
00303               cm_return  - return code >0 if error
00304             }
00305     """
00306 
00307     # Check for pattern
00308     idu=i.get('index_data_uoa','')
00309 
00310     pattern=""
00311     if '*' in idu or '?' in idu: 
00312        pattern1=idu.replace('*','.*')
00313        pattern1=pattern1.replace('?','.?')
00314        pattern='.*'+pattern1+'$'
00315        idu=''
00316 
00317     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00318        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00319 
00320     indexes=0
00321 
00322     # Check delay
00323     d=0.05
00324     if 'delay' in i and i['delay']!='': d=float(i['delay'])
00325 
00326     # Start timer
00327     t0=time.time()
00328 
00329     cm_kernel.print_for_con('Reindexing all entries in all repositories ...')
00330 
00331     cm_kernel.print_for_con('')
00332 
00333     # Search through repositories
00334     if 'index_repo_uoa' in i:
00335        # Load repo
00336        r2=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-repo'],
00337                             'cm_action':'load',
00338                             'cm_data_uoa':i['index_repo_uoa']})
00339        if r2['cm_return']>0: return r2
00340        rlist=[{'cm_uoa':r2['cm_uoa'], 'cm_uid':r2['cm_uid']}]
00341     else:
00342        r2=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-repo'],
00343                             'cm_action':'list'})
00344        if r2['cm_return']>0: return r2
00345        rlist=r2['cm_mixed']
00346 
00347     for repo1 in rlist:
00348         # Check print
00349         repo_uoa=repo1['cm_uoa']
00350         repo_uid=repo1['cm_uid']
00351 
00352         # Load repo to exclude remote
00353         r2=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-repo'],
00354                              'cm_action':'load',
00355                              'cm_data_uoa':repo_uid})
00356         if r2['cm_return']>0: return r2
00357         repo_cfg=r2['cm_data_obj']['cfg']
00358         if repo_cfg.get('path_type','')=='remote_url':
00359 #               cm_kernel.print_for_con('')
00360 #               cm_kernel.print_for_con('Skipping remote repo '+repo_uoa+' ...')
00361            pass
00362         else:
00363            # Search through modules
00364            mlist=[]
00365            if 'index_module_uoa' in i:
00366               # Load module
00367               r1=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-module'],
00368                                    'cm_action':'load',
00369                                    'cm_data_uoa':i['index_module_uoa']})
00370               if r1['cm_return']>0: return r1
00371               mlist=[{'cm_uoa':r1['cm_uoa'], 'cm_uid':r1['cm_uid']}]
00372            else:
00373               r1=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-core'],
00374                                    'cm_action':'list',
00375                                    'cm_repo_uoa':repo_uoa,
00376                                    'cm_only_available_modules':'yes'})
00377               if r1['cm_return']>0: return r1
00378               mlist=r1['cm_mixed']
00379 
00380            for module1 in mlist:
00381                module_uoa=module1['cm_uoa']
00382                module_uid=module1['cm_uid']
00383 
00384                # List data
00385                if idu in i:
00386                   # Load data
00387                   r3=cm_kernel.access({'cm_run_module_uoa':module_uoa,
00388                                        'cm_action':'load',
00389                                        'cm_repo_uoa':repo_uoa,
00390                                        'cm_data_uoa':idu})
00391                   if r3['cm_return']>0: return r3
00392                   dlist=[{'cm_uoa':r3['cm_uoa'], 'cm_uid':r3['cm_uid']}]
00393                else:
00394                   r3=cm_kernel.access({'cm_run_module_uoa':module_uoa,
00395                                        'cm_repo_uoa':repo_uoa,
00396                                        'cm_action':'list'})
00397                   if r3['cm_return']>0: return r3
00398                   dlist=r3['cm_mixed']
00399 
00400                for data1 in dlist:
00401                    data_uoa=data1['cm_uoa']
00402                    data_uid=data1['cm_uid']
00403                    data_daa=data1.get('cm_display_as_alias','')
00404 
00405                    process=True
00406 
00407                    # Check for pattern
00408                    if not re.match(pattern,data_uoa) and not re.match(pattern,data_daa):
00409                       process=False 
00410 
00411                    if process:
00412                       cm_kernel.print_for_con('Indexing '+repo_uoa+':'+module_uoa+':'+data_uoa+' entry (total entries='+str(indexes)+', elapsed time='+str("%.1f" % (time.time()-t0))+' sec.)')
00413                       indexes+=1
00414 
00415                       # Update entry
00416 #                      cm_kernel.print_for_con('  Started updating entry, elapsed time='+str("%.1f" % (time.time()-t0))+' sec.)')
00417 #                      ii={'cm_run_module_uoa':module_uoa,
00418 #                          'cm_action':'update',
00419 #                          'cm_repo_uoa':repo_uoa,
00420 #                          'cm_data_uoa':data_uoa,
00421 #                          'cm_skip_update_info':'yes'}
00422 #                      r=cm_kernel.access(ii)
00423 #                      if r['cm_return']>0: return r
00424 #                      cm_kernel.print_for_con('  Stopped updating entry, elapsed time='+str("%.1f" % (time.time()-t0))+' sec.)')
00425 
00426 #                      cm_kernel.print_for_con('  Started indexing entry, elapsed time='+str("%.1f" % (time.time()-t0))+' sec.)')
00427                       # Add index
00428                       ii = {'cm_repo_uoa':repo_uoa, \
00429                             'cm_repo_uid':repo_uid, \
00430                             'cm_module_uoa':module_uoa, \
00431                             'cm_module_uid':module_uid, \
00432                             'cm_data_uoa':data_uoa, \
00433                             'cm_data_uid':data_uid, \
00434                             'mode': 'add'}
00435                       if 'sequential' in i: ii['sequential']=i['sequential']
00436                       r=index(ii)
00437                       if r['cm_return']>0:
00438                          cm_kernel.print_for_con('cM error: '+r['cm_error'])
00439                       elif r['cm_return']<0:
00440                          cm_kernel.print_for_con('cM warning: '+r['cm_warning'])
00441 #                      cm_kernel.print_for_con('  Stopped indexing entry, elapsed time='+str("%.1f" % (time.time()-t0))+' sec.)')
00442 
00443                       time.sleep(d)
00444 
00445     return {'cm_return':0}
00446 
00447 # ============================================================================
00448 def index(i):
00449 
00450     """
00451     Add or delete index
00452 
00453     Input:  {
00454               cm_module_uoa       - module UOA
00455               (cm_module_uid)
00456               cm_data_uoa         - data UOA
00457               (cm_data_uid)        
00458               (cm_repo_uoa)       - repo UOA
00459               (cm_repo_uid)
00460               mode                - 'delete' | 'add'
00461               (cm_index_uoa)      - index configuration
00462               (sequential)        - if 'yes' perform sequential index (very slow, but reports errors)
00463             }
00464 
00465     Output: {
00466               cm_return  - return code >0 if error
00467             }
00468     """
00469 
00470     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00471        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00472 
00473     # Check (default) index configuration
00474     index_uoa=''
00475     if 'cm_index_uoa' in i and i['cm_index_uoa']!='': index_uoa=i['cm_index_uoa']
00476     elif 'cm_default_index_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_index_uoa']!='':
00477        index_uoa=cm_kernel.ini['dcfg']['cm_default_index_uoa']
00478 
00479     if index_uoa=='':
00480        return {'cm_return':1, 'cm_error':'"cm_index_uoa" is not defined'}
00481 
00482     # Load index
00483     r=cm_kernel.access({'cm_run_module_uoa':ini['cm_module_uid'],
00484                         'cm_action':'load',
00485                         'cm_data_uoa':index_uoa})
00486     if r['cm_return']>0: return r
00487     index_cfg=r['cm_data_obj']['cfg']
00488 
00489     host=index_cfg['host']+':'+index_cfg.get('port','9200')
00490 
00491     # Load data if incomplete UOA/UID
00492     data_uoa=i.get('cm_data_uoa','')
00493     data_uid=i.get('cm_data_uid','')
00494 
00495     repo_uoa=i.get('cm_repo_uoa','')
00496     repo_uid=i.get('cm_repo_uid','')
00497 
00498     module_uoa=i.get('cm_module_uoa','')
00499     module_uid=i.get('cm_module_uid','')
00500 
00501     data={}
00502     if i['mode']=='add' or data_uid=='' or repo_uid=='' or module_uid=='':
00503        ii={'cm_run_module_uoa':i['cm_module_uoa'],
00504            'cm_data_uoa':i['cm_data_uoa'],
00505            'cm_action':'load'}
00506        if 'cm_repo_uoa' in i: ii.update({'cm_repo_uoa':i['cm_repo_uoa']})
00507        rx=cm_kernel.access(ii)
00508        if rx['cm_return']>0: return rx
00509 
00510        data=rx['cm_data_obj']['cfg']
00511 
00512        data_uoa=rx['cm_uoa']
00513        data_uid=rx['cm_uid']
00514 
00515        repo_uoa=rx['cm_repo_uoa']
00516        repo_uid=rx['cm_repo_uid']
00517 
00518        module_uoa=rx['cm_module_uoa']
00519        module_uid=rx['cm_module_uid']
00520 
00521     index={}
00522     jindex=''
00523     tdata=copy.deepcopy(data) # so if we remove keys from index, other code is not affected
00524     if i['mode']=='add':
00525        # We force removal of not-needed keys to avoid saturations of repository
00526 
00527        kd=ini['cfg'].get('remove_keys_from_data_if_exceed_size',[])
00528        for q in kd:
00529            if q in tdata: 
00530               del(tdata[q])
00531 
00532        # Flatten array
00533        ri=cm_kernel.flatten_array({'cm_array': tdata})
00534        if ri['cm_return']>0: return ri
00535        flat_data=ri['cm_array']
00536 
00537        # Prepare index
00538        index = {'cm_module_uoa':module_uoa, \
00539                 'cm_module_uid':module_uid, \
00540                 'cm_data_uoa':data_uoa, \
00541                 'cm_data_uid':data_uid, \
00542                 'cfg': tdata,
00543                 'flat_cfg':flat_data}
00544 
00545        if repo_uoa!='': index['cm_repo_uoa']=repo_uoa
00546        if repo_uid!='': index['cm_repo_uid']=repo_uid
00547 
00548        jindex = json.dumps(index)
00549        if 'json_size_limit' in index_cfg and len(jindex)>long(index_cfg['json_size_limit']):
00550           return {'cm_return':-1, 'cm_warning': 'json size exceeded limit ('+str(len(jindex))+')'}
00551 
00552 #       ready=False
00553 #
00554 #       kd=ini['cfg'].get('remove_keys_from_data_if_exceed_size',[])
00555 #       ikd=len(kd)
00556 #       ik=0
00557 #       while not ready:
00558 #          # Flatten array
00559 #          ri=cm_kernel.flatten_array({'cm_array': tdata})
00560 #          if ri['cm_return']>0: return ri
00561 #          flat_data=ri['cm_array']
00562 #
00563 #          # Prepare index
00564 #          index = {'cm_module_uoa':module_uoa, \
00565 #                   'cm_module_uid':module_uid, \
00566 #                   'cm_data_uoa':data_uoa, \
00567 #                   'cm_data_uid':data_uid, \
00568 #                   'cfg': tdata,
00569 #                   'flat_cfg':flat_data}
00570 #
00571 #          if repo_uoa!='': index['cm_repo_uoa']=repo_uoa
00572 #          if repo_uid!='': index['cm_repo_uid']=repo_uid
00573 #
00574 #          jindex = json.dumps(index)
00575 #          if 'json_size_limit' in index_cfg and len(jindex)>long(index_cfg['json_size_limit']):
00576 #             if ik>=ikd:
00577 #                return {'cm_return':-1, 'cm_warning': 'json size exceeded limit ('+str(len(jindex))+')'}
00578 #
00579 #             q=kd[ik]
00580 #             if q in tdata: del(tdata[q])
00581 #             ik+=1
00582 #          else:
00583 #             ready=True
00584 
00585     # Prepare URL
00586     url=host+'/'+module_uid+'/'+data_uid+'/1'
00587 
00588     r={'cm_return':0}
00589 
00590     if cm_kernel.ini['dcfg'].get('use_curl_for_indexing','')=='yes':
00591        # Generate output file
00592        rx=cm_kernel.gen_cm_tmp_file({})
00593        if rx['cm_return']>0: return rx
00594        tfo1=rx['cm_path']
00595 
00596        # Generate output file
00597        rx=cm_kernel.gen_cm_tmp_file({})
00598        if rx['cm_return']>0: return rx
00599        tfo2=rx['cm_path']
00600 
00601        # First delete index
00602        cmd='curl -XDELETE '+url+' -s -o '+tfo1
00603        os.system(cmd)
00604        # Ignore output
00605        if os.path.isfile(tfo1): os.remove(tfo1)
00606 
00607        # Second, if mode=='add', add index
00608        if i.get('mode','')=='add':
00609           # Save json to temporay file
00610           rx=cm_kernel.gen_cm_tmp_file({})
00611           if rx['cm_return']>0: return rx
00612           tf=rx['cm_path']
00613           r=cm_kernel.save_array_to_file_as_json({'cm_filename':tf, 'cm_array':index})
00614           if r['cm_return']>0: return r
00615 
00616           cmd='curl -XPUT '+url+' -d @'+tf+' -s -o '+tfo2
00617           os.system(cmd)
00618 
00619           # Read output
00620           r1=cm_kernel.load_json_file({'cm_filename':tfo2})
00621           if r1['cm_return']>0: return r1
00622           xcfg=r1['cm_array']
00623 
00624           if not xcfg.get('ok',True):
00625              return {'cm_return':1, 'cm_error': 'indexing failed'}
00626 
00627           os.remove(tf)
00628           os.remove(tfo2)
00629       
00630     else:
00631        if i.get('sequential','')=='yes':
00632           r=index_seq(url, jindex, i['mode'], module_uid, data_uid)
00633        else:
00634           t=threading.Thread(target=index_seq, args=(url, jindex, i['mode'], module_uid, data_uid))
00635           t.start()
00636           # TBD: need to add check if was success or not!
00637 
00638     return r
00639 
00640 # ============================================================================
00641 def index_seq(url, jindex, mode, module_uid, data_uid):
00642 
00643     """
00644     Add or delete index
00645 
00646     Input:  {
00647               url    - Elastic search URL
00648               jindex - index in json to add
00649               mode   - 'delete' | 'add'
00650             }
00651 
00652     Output: {
00653               cm_return  - return code >0 if error
00654             }
00655     """
00656 
00657     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00658        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00659 
00660     request = urllib2.Request(url)
00661     if mode=='add' or mode=='delete':
00662        request.get_method = lambda: 'DELETE'
00663 
00664     try:
00665        f=urllib2.urlopen(request, jindex)
00666     except urllib2.URLError as e:
00667        if mode=='':
00668           cm_kernel.print_for_con('cM error: can\'t index ('+format(e)+')')
00669           return {'cm_return':1, 'cm_error':'can\'t index ('+format(e)+')'}
00670 
00671        pass # If not found, should ignore; but otherwise TBD - should print error (such as connection refused)
00672     else:
00673        s=str(f.read())
00674        try:
00675           j=json.loads(s)
00676        except:
00677           cm_kernel.print_for_con('cM error: can\'t parse output during indexing '+module_uoa+':'+data_uoa)
00678           return {'cm_return':1, 'cm_error':'can\'t parse output during indexing'}
00679        if not j["ok"]:
00680           cm_kernel.print_for_con('cM error: indexing was not successful during indexing '+module_uoa+':'+data_uoa)
00681           return {'cm_return':1, 'cm_error':'indexing was not successful indexing '+module_uoa+':'+data_uoa}
00682 
00683        f.close()
00684 
00685     r={'cm_return':0}
00686     if mode=='add':
00687        r=index_seq(url, jindex, '', module_uid, data_uid) # repeat add after delete
00688 
00689     return r
00690 
00691 # ============================================================================
00692 def search(i):
00693 
00694     """
00695     Search
00696 
00697     Input:  {
00698               cm_es_string   - simple search
00699               cm_es_query    - elastic search query
00700               (cm_index_uoa) - index configuration
00701               (size)         - limit return array
00702             }
00703 
00704     Output: {
00705               cm_return  - return code >0 if error
00706               cm_array   - array of UIDs
00707             }
00708     """
00709 
00710     if cm_kernel.ini['dcfg'].get('use_indexing','')!='yes':
00711        return {'cm_return':1, 'cm_error':'indexing is OFF in the default kernel configuration'}
00712 
00713     size=i.get('size','10000')
00714 
00715     if 'cm_es_string' not in i and 'cm_es_query' not in i:
00716        return {'cm_return':1, 'cm_error':'neither "cm_es_string" nor "cm_es_query" is defined'}
00717 
00718     all=[]
00719 
00720     # Check (default) index configuration
00721     index_uoa=''
00722     if 'cm_index_uoa' in i and i['cm_index_uoa']!='': index_uoa=i['cm_index_uoa']
00723     elif 'cm_default_index_uoa' in cm_kernel.ini['dcfg'] and cm_kernel.ini['dcfg']['cm_default_index_uoa']!='':
00724        index_uoa=cm_kernel.ini['dcfg']['cm_default_index_uoa']
00725 
00726     if index_uoa=='':
00727        return {'cm_return':1, 'cm_error':'"cm_index_uoa" is not defined'}
00728 
00729     # Load index
00730     r=cm_kernel.access({'cm_run_module_uoa':ini['cm_module_uid'],
00731                         'cm_action':'load',
00732                         'cm_data_uoa':index_uoa})
00733     if r['cm_return']>0: return r
00734     index_cfg=r['cm_data_obj']['cfg']
00735 
00736     host=index_cfg['host']+':'+index_cfg.get('port','9200')
00737 
00738     # Check query
00739     jindex=None
00740     if 'cm_es_string' in i:
00741        url=host+'/_search?q='+urllib.quote_plus(i['cm_es_string'].encode('utf-8'))+'&size='+size
00742     else:
00743        url=host+'/_search?size='+size
00744        jindex = json.dumps({"query":i['cm_es_query']})
00745 
00746     try:
00747        if jindex==None:
00748           f=urllib2.urlopen(url)
00749        else:
00750           f=urllib2.urlopen(url, jindex)
00751 
00752     except urllib2.URLError, e:
00753        x='error while accessing index server ('+format(e)
00754        if jindex!=None:
00755          x+='; '+json.dumps(jindex, indent=2)
00756        x+=')'
00757        return {'cm_return':1, 'cm_error':x}
00758     else:
00759        s=str(f.read())
00760        try:
00761           j=json.loads(s)
00762        except:
00763           return {'cm_return':1, 'cm_error':'can\'t parse output during search'}
00764 
00765        f.close()
00766 
00767        # Cache repo and module
00768        repo={}
00769        module={}
00770 
00771        if 'hits' in j and 'total' in j['hits'] and long(j['hits']['total'])>0:
00772           for x in j['hits']['hits']:
00773               y=x['_source']
00774 
00775               d=y['cfg']
00776 
00777               # Adding to the list
00778               ii={'cm_module_uoa':y['cm_module_uoa'],
00779                   'cm_module_uid':y['cm_module_uid'],
00780                   'cm_data_uoa':y['cm_data_uoa'],
00781                   'cm_data_uid':y['cm_data_uid']}
00782               if 'cm_repo_uoa' in y: ii.update({'cm_repo_uoa':y['cm_repo_uoa']})
00783               if 'cm_repo_uid' in y: ii.update({'cm_repo_uid':y['cm_repo_uid']})
00784               if 'cm_display_as_alias' in d: ii.update({'cm_display_as_alias':d['cm_display_as_alias']})
00785 
00786               repo_uoa=y['cm_repo_uoa']
00787               if repo_uoa not in repo:
00788                  # Load repo to check if should ignore external repos for search
00789                  r9=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-repo'],
00790                                       'cm_data_uoa':repo_uoa,
00791                                       'cm_action':'load'})
00792                  if r9['cm_return']>0: 
00793                     repo[repo_uoa]={'access':'no'}
00794                  else:
00795                     r9d=r9['cm_data_obj']['cfg']
00796                     if r9d.get('exclude_from_search','')=='yes':
00797                        repo[repo_uoa]={'access':'no'}
00798                     else:
00799                        repo[repo_uoa]={'access':'yes', 'cm_display_as_alias':r9d.get('cm_display_as_alias','')}
00800                        ii['cm_repo_display_as_alias']=r9d.get('cm_display_as_alias','')
00801               else:
00802                  ii['cm_repo_display_as_alias']=repo[repo_uoa].get('cm_display_as_alias','')
00803 
00804               if repo[repo_uoa]['access']=='yes':
00805 
00806                  module_uoa=y['cm_module_uoa']
00807                  if module_uoa not in module:
00808                     # Load repo to check if should ignore external repos for search
00809                     r8=cm_kernel.access({'cm_run_module_uoa':ini['cfg']['cm_modules']['cm-module'],
00810                                          'cm_data_uoa':module_uoa,
00811                                          'cm_action':'load'})
00812                     if r8['cm_return']>0: 
00813                        module[module_uoa]={'access':'no'}
00814                     else:
00815                        r8d=r8['cm_data_obj']['cfg']
00816                        module[module_uoa]={'access':'yes', 'cm_display_as_alias':r8d.get('cm_display_as_alias','')}
00817                        ii['cm_module_display_as_alias']=r8d.get('cm_display_as_alias','')
00818                  else:
00819                     ii['cm_module_display_as_alias']=module[module_uoa].get('cm_display_as_alias','')
00820 
00821                  # Check fine-grain access
00822                  ac=cm_kernel.ini['dcfg'].get('access_control',{})
00823                  if i.get('cm_admin','')!='yes' and cm_kernel.ini['web']=='yes' and ac.get('use','')=='yes':
00824                     rx=cm_kernel.access_control({'dcfg':d, 'module_uoa':y['cm_module_uoa'], \
00825                                                  'module_uid':y['cm_module_uid'], 'data_uoa':y['cm_data_uoa'], 'key':'read'})
00826                     if rx['cm_return']==0:
00827                        all.append(ii)
00828                  else:
00829                     all.append(ii)
00830 
00831     if i.get('cm_console','')=='txt':
00832        for x in all:
00833            cm_kernel.print_for_con(x['cm_repo_uoa']+':'+x['cm_module_uoa']+':'+x['cm_data_uoa'])
00834 #       cm_kernel.print_for_con(json.dumps(all, indent=4))
00835 
00836     return {'cm_return':0, 'cm_array':all}
00837 
00838 # ============================================================================
00839 def clean(i):
00840 
00841     """
00842     Search
00843 
00844     Input:  {
00845               (index_repo_uoa) -   Repository UOA to clean indexes
00846               (index_module_uoa) - Module UOA to clean indexes
00847               (index_data_Uoa) -   Data UOA to clean indexes
00848             }
00849 
00850     Output: {
00851               cm_return  - return code >0 if error
00852             }
00853     """
00854 
00855     # Prepare search string
00856     es_string=''
00857 
00858     x=i.get('index_repo_uoa','')
00859     if x!='':
00860        if es_string!='': es_string+=' AND '
00861        es_string+=' ('
00862        es_string+='(cm_repo_uid:"'+x+'") OR (cm_repo_uoa:"'+x+'")'
00863        es_string+=')'
00864 
00865     x=i.get('index_module_uoa','')
00866     if x!='':
00867        if es_string!='': es_string+=' AND '
00868        es_string+=' ('
00869        es_string+='(cm_module_uid:"'+x+'") OR (cm_module_uoa:"'+x+'")'
00870        es_string+=')'
00871 
00872     x=i.get('index_data_uoa','')
00873     if x!='':
00874        if es_string!='': es_string+=' AND '
00875        es_string+=' ('
00876        es_string+='(cm_data_uid:"'+x+'") OR (cm_data_uoa:"'+x+'")'
00877        es_string+=')'
00878 
00879     if es_string=='':
00880        return {'cm_return':1,'cm_error':'empty query - if you would like to clean the whole index, use "flush"'}
00881 
00882     r=search({'cm_es_string':es_string})
00883     if r['cm_return']>0: return r
00884 
00885     all=r['cm_array']
00886     j=0
00887     for q in all:
00888         j+=1
00889         cm_kernel.print_for_con('Cleaning '+q['cm_repo_uoa']+':'+q['cm_module_uoa']+':'+q['cm_data_uoa']+' entry ('+str(j)+' of '+str(len(all))+')')
00890 
00891         ii={'cm_repo_uoa':q['cm_repo_uoa'],
00892             'cm_repo_uid':q['cm_repo_uid'],
00893             'cm_module_uoa':q['cm_module_uoa'],
00894             'cm_module_uid':q['cm_module_uid'],
00895             'cm_data_uoa':q['cm_data_uoa'],
00896             'cm_data_uid':q['cm_data_uid'],
00897             'mode':'delete'}
00898         r=index(ii)
00899         if r['cm_return']>0: return r
00900 
00901     return {'cm_return':0}

Generated on Wed May 28 02:49:02 2014 for Collective Mind Framework by DoxyGen 1.6.1
Concept, design and coordination: Grigori Fursin (C) 1993-2013