FastDFS非常適合儲存大量的小檔案,遺憾的是本身不支援自定義檔名,檔名是儲存成功以後根據儲存位置生成的一個file_id。很多應用場景不得不使用自定義檔名,在不修改其原始碼的情況下,可以在儲存客戶端fdfs_client增加一個用來儲存自定義檔名和fastdfs的file_id之間的對映關係的資料庫間接實現自定義檔名的存取和訪問,在這裡我們選用了reids。順便說一下,淘寶也有一個類似於FastDFS的檔案儲存系統TFS,對於自定義檔名,它是用mysql來儲存對映關係的,我認為在高併發訪問下mysql本身就是瓶頸,因此在這個方案中採用了redis。
準備工作:
fastdfs環境安裝...略...(官方:https://code.google.com/p/fastdfs/)
redis環境安裝...略...(官方:http://redis.io/)
用python實現,因此需要安裝fastdfs的python客戶端(下載:https://fastdfs.googlecode.com/files/fdfs_client-py-1.2.6.tar.gz)
python的redis客戶端,到https://pypi.python.org/pypi/redis下載
# -*- coding: utf-8 -*- import setting from fdfs_client.client import * from fdfs_client.exceptions import * from fdfs_client.connection import * import redis import time import logging import random logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.DEBUG) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) class RedisError(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class fastdfsClient(Fdfs_client): def __init__(self): self.tracker_pool = ConnectionPool(**setting.fdfs_tracker) self.timeout = setting.fdfs_tracker['timeout'] return None def __del__(self): try: self.pool.destroy() self.pool = None except: pass class fastdfs(object): def __init__(self): ''' conf_file:配置檔案 ''' self.fdfs_client = fastdfsClient() self.fdfs_redis = [] for i in setting.fdfs_redis_dbs: self.fdfs_redis.append(redis.Redis(host=i[0], port=i[1], db=i[2])) def store_by_buffer(self,buf,filename=None,file_ext_name = None): ''' buffer儲存檔案 引數: filename:自定義檔名,如果不指定,將遠端file_id作為檔名 file_ext_name:副檔名(可選),如果不指定,將根據自定義檔名智慧判斷 返回值: { 'group':組名, 'file_id':不含組名的檔案ID, 'size':檔案尺寸, 'upload_time':上傳時間 } ''' if filename and random.choice(self.fdfs_redis).exists(filename): logger.info('File(%s) exists.'%filename) return random.choice(self.fdfs_redis).hgetall(filename) t1 = time.time() # try: ret_dict = self.fdfs_client.upload_by_buffer(buf,file_ext_name) # except Exception,e: # logger.error('Error occurred while uploading: %s'%e.message) # return None t2 = time.time() logger.info('Upload file(%s) by buffer, time consume: %fs' % (filename,(t2 - t1))) for key in ret_dict: logger.debug('[+] %s : %s' % (key, ret_dict[key])) stored_filename = ret_dict['Remote file_id'] stored_filename_without_group = stored_filename[stored_filename.index('/')+1:] if not filename: filename =stored_filename_without_group vmp = {'group':ret_dict['Group name'],'file_id':stored_filename_without_group,'size':ret_dict['Uploaded size'],'upload_time':int(time.time()*1000)} try: for i in self.fdfs_redis: if not i.hmset(filename,vmp): raise RedisError('Save Failure') logger.info('Store file(%s) by buffer successful' % filename) except Exception,e: logger.error('Save info to Redis failure. rollback...') try: ret_dict = self.fdfs_client.delete_file(stored_filename) except Exception,e: logger.error('Error occurred while deleting: %s'%e.message) return None return vmp def remove(self,filename): ''' 刪除檔案, filename是使用者自定義檔名 return True|False ''' fileinfo = random.choice(self.fdfs_redis).hgetall(filename) stored_filename = '%s/%s'%(fileinfo['group'],fileinfo['file_id']) try: ret_dict = self.fdfs_client.delete_file(stored_filename) logger.info('Remove stored file successful') except Exception,e: logger.error('Error occurred while deleting: %s'%e.message) return False for i in self.fdfs_redis: if not i.delete(filename): logger.error('Remove fileinfo in redis failure') logger.info('%s removed.'%filename) return True def download(self,filename): ''' 下載檔案 返回二進位制 ''' finfo = self.getInfo(filename) if finfo: ret = self.fdfs_client.download_to_buffer('%s/%s'%(finfo['group'],finfo['file_id'])) return ret['Content'] else: logger.debug('%s is not exists'%filename) return None def list(self,pattern='*'): ''' 列出檔案列表 ''' return random.choice(self.fdfs_redis).keys(pattern) def getInfo(self,filename): ''' 獲得檔案資訊 return:{ 'group':組名, 'file_id':不含組名的檔案ID, 'size':檔案尺寸, 'upload_time':上傳時間 } ''' return random.choice(self.fdfs_redis).hgetall(filename)
配置:
# -*- coding: utf-8 -*- #fastdfs tracker, multiple tracker supported fdfs_tracker = { 'host_tuple':('192.168.2.233','192.168.2.234'), 'port':22122, 'timeout':30, 'name':'Tracker Pool' } #fastdfs meta db, multiple redisdb supported fdfs_redis_dbs = ( ('192.168.2.233',6379,0), ('192.168.2.233',6379,1) )