Package pypln :: Package stores :: Module filestore
[hide private]

Source Code for Module pypln.stores.filestore

  1  # coding: utf-8 
  2  """ 
  3  Configure an interface for accessing files in MongoDB's GridFS 
  4  """ 
  5   
  6  import os 
  7  from hashlib import md5 
  8  from logging import Logger, StreamHandler, Formatter 
  9  from sys import stdout 
 10  from pymongo import Connection 
 11  from gridfs import GridFS 
 12  from gridfs.errors import FileExists 
 13  from bson.errors import InvalidStringData 
 14   
 15   
16 -class FS(object):
17 - def __init__(self, database, host='127.0.0.1', port=27017, usr=None, 18 pw=None, create=False, logger=None):
19 """ 20 Sets up a connection to a gridFS on a given database 21 if the database does not exist and create=True, 22 the Database is also created 23 :param Database: Database on which to look for GridFS 24 :param host: Host of the MongoDB 25 :param port: Port of MongoDB 26 :param usr: user authorized for the connection 27 :param pw: password for the authorized user 28 :param create: whether to create a file storage if it doesn't exist 29 """ 30 self.conn = Connection(host=host, port=port) 31 if database not in self.conn.database_names(): 32 if not create: 33 raise NameError('Database does not exist (if you want to ' 34 'create, use create=True') 35 else: 36 self.db = self.conn[database] 37 if usr and pw: 38 self.db.add_user(usr, pw) 39 self.db = self.conn[database] 40 if usr and pw: 41 self.db.authenticate(usr, pw) 42 self.fs = GridFS(self.db) 43 if logger is None: 44 logger = Logger('filestore') 45 handler = StreamHandler(stdout) 46 formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 47 handler.setFormatter(formatter) 48 logger.addHandler(handler) 49 self.logger = logger
50
51 - def _rename(self, fn):
52 """ 53 Get a filename to rename (try variations to find an available name) 54 :return: filename 55 """ 56 i = 0 57 pos = fn.find('.',-5) #finds the dot before the extension 58 while self.fs.exists(filename=fn): 59 fn = fn.strip(str(i)) 60 i += 1 61 fn = fn[:pos]+"_"+str(i)+fn[pos:] 62 return fn
63
64 - def add_file(self, fname):
65 """ 66 Add a file to the collection 67 :Parameters: 68 - fname: filename 69 """ 70 fn = os.path.split(fname)[1] 71 if self.fs.exists(filename=fn): 72 # Check md5 to see if the files are really the same 73 ef = self.fs.get_version(filename=fn) 74 with open(fname, 'r') as f: 75 filemd5 = md5(f.read()).hexdigest() 76 if ef.md5 == filemd5: 77 self.logger.warning('File {} has already been stored, ' 78 'skipping.'.format(fn)) 79 return 80 else: 81 fn_new = self._rename(fn) 82 self.logger.warning('Name collision. Renaming {0} to {1}'\ 83 .format(fn, fn_new)) 84 else: 85 fn_new = fn 86 87 with open(fname, 'r') as f: 88 try: 89 fid = self.fs.put(f, filename=fn_new) 90 except FileExists: 91 self.logger.warning('File {} has already been processed, ' 92 'skipping.'.format(fn)) 93 fid = None 94 except InvalidStringData as err: 95 self.logger.error('Invalid string data for file {0}'.format(fn)) 96 fid = None 97 return fid
98
99 - def drop(self):
100 """ Drop the file storage """ 101 self.db.drop_collection('fs')
102