1
2 """
3 Configure an interface for accessing files in MongoDB's GridFS
4 """
5
6 import os
7 from hashlib import md5
8 from logging import Logger, StreamHandler, Formatter
9 from sys import stdout
10 from pymongo import Connection
11 from gridfs import GridFS
12 from gridfs.errors import FileExists
13 from bson.errors import InvalidStringData
14
15
17 - def __init__(self, database, host='127.0.0.1', port=27017, usr=None,
18 pw=None, create=False, logger=None):
19 """
20 Sets up a connection to a gridFS on a given database
21 if the database does not exist and create=True,
22 the Database is also created
23 :param Database: Database on which to look for GridFS
24 :param host: Host of the MongoDB
25 :param port: Port of MongoDB
26 :param usr: user authorized for the connection
27 :param pw: password for the authorized user
28 :param create: whether to create a file storage if it doesn't exist
29 """
30 self.conn = Connection(host=host, port=port)
31 if database not in self.conn.database_names():
32 if not create:
33 raise NameError('Database does not exist (if you want to '
34 'create, use create=True')
35 else:
36 self.db = self.conn[database]
37 if usr and pw:
38 self.db.add_user(usr, pw)
39 self.db = self.conn[database]
40 if usr and pw:
41 self.db.authenticate(usr, pw)
42 self.fs = GridFS(self.db)
43 if logger is None:
44 logger = Logger('filestore')
45 handler = StreamHandler(stdout)
46 formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
47 handler.setFormatter(formatter)
48 logger.addHandler(handler)
49 self.logger = logger
50
52 """
53 Get a filename to rename (try variations to find an available name)
54 :return: filename
55 """
56 i = 0
57 pos = fn.find('.',-5)
58 while self.fs.exists(filename=fn):
59 fn = fn.strip(str(i))
60 i += 1
61 fn = fn[:pos]+"_"+str(i)+fn[pos:]
62 return fn
63
65 """
66 Add a file to the collection
67 :Parameters:
68 - fname: filename
69 """
70 fn = os.path.split(fname)[1]
71 if self.fs.exists(filename=fn):
72
73 ef = self.fs.get_version(filename=fn)
74 with open(fname, 'r') as f:
75 filemd5 = md5(f.read()).hexdigest()
76 if ef.md5 == filemd5:
77 self.logger.warning('File {} has already been stored, '
78 'skipping.'.format(fn))
79 return
80 else:
81 fn_new = self._rename(fn)
82 self.logger.warning('Name collision. Renaming {0} to {1}'\
83 .format(fn, fn_new))
84 else:
85 fn_new = fn
86
87 with open(fname, 'r') as f:
88 try:
89 fid = self.fs.put(f, filename=fn_new)
90 except FileExists:
91 self.logger.warning('File {} has already been processed, '
92 'skipping.'.format(fn))
93 fid = None
94 except InvalidStringData as err:
95 self.logger.error('Invalid string data for file {0}'.format(fn))
96 fid = None
97 return fid
98
100 """ Drop the file storage """
101 self.db.drop_collection('fs')
102