FastDFS加Redis實現自定義檔名儲存海量檔案

pythontab發表於2013-08-08

FastDFS非常適合儲存大量的小檔案,遺憾的是本身不支援自定義檔名,檔名是儲存成功以後根據儲存位置生成的一個file_id。很多應用場景不得不使用自定義檔名,在不修改其原始碼的情況下,可以在儲存客戶端fdfs_client增加一個用來儲存自定義檔名和fastdfs的file_id之間的對映關係的資料庫間接實現自定義檔名的存取和訪問,在這裡我們選用了reids。順便說一下,淘寶也有一個類似於FastDFS的檔案儲存系統TFS,對於自定義檔名,它是用mysql來儲存對映關係的,我認為在高併發訪問下mysql本身就是瓶頸,因此在這個方案中採用了redis。

準備工作:

fastdfs環境安裝...略...(官方:https://code.google.com/p/fastdfs/)

redis環境安裝...略...(官方:http://redis.io/)

用python實現,因此需要安裝fastdfs的python客戶端(下載:https://fastdfs.googlecode.com/files/fdfs_client-py-1.2.6.tar.gz)

python的redis客戶端,到https://pypi.python.org/pypi/redis下載

# -*- coding: utf-8 -*-
import setting
from fdfs_client.client import *
from fdfs_client.exceptions import *
 
from fdfs_client.connection import *
 
import redis
import time
import logging
import random
 
logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.DEBUG)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
 
 
class RedisError(Exception):
     def __init__(self, value):
         self.value = value
     def __str__(self):
         return repr(self.value)
 
class fastdfsClient(Fdfs_client):
    def __init__(self):
        self.tracker_pool = ConnectionPool(**setting.fdfs_tracker)
        self.timeout  = setting.fdfs_tracker['timeout']
        return None
 
    def __del__(self):
        try:
            self.pool.destroy()
            self.pool = None
        except:
            pass
 
class fastdfs(object):
    def __init__(self):
        '''
        conf_file:配置檔案
        '''
        self.fdfs_client = fastdfsClient()
        self.fdfs_redis = []
        for i in setting.fdfs_redis_dbs:
            self.fdfs_redis.append(redis.Redis(host=i[0], port=i[1], db=i[2]))
 
    def store_by_buffer(self,buf,filename=None,file_ext_name = None):
        '''
        buffer儲存檔案
        引數:
        filename:自定義檔名,如果不指定,將遠端file_id作為檔名
        file_ext_name:副檔名(可選),如果不指定,將根據自定義檔名智慧判斷
        返回值:
        {
        'group':組名,
        'file_id':不含組名的檔案ID,
        'size':檔案尺寸,
        'upload_time':上傳時間
        }
        '''
        if filename and  random.choice(self.fdfs_redis).exists(filename):
            logger.info('File(%s) exists.'%filename)
            return   random.choice(self.fdfs_redis).hgetall(filename)
        t1 = time.time()
#        try:
        ret_dict = self.fdfs_client.upload_by_buffer(buf,file_ext_name)
#        except Exception,e:
#            logger.error('Error occurred while uploading: %s'%e.message)
#            return None
        t2 = time.time()
        logger.info('Upload file(%s) by buffer, time consume: %fs' % (filename,(t2 - t1)))
        for key in ret_dict:
            logger.debug('[+] %s : %s' % (key, ret_dict[key]))
        stored_filename = ret_dict['Remote file_id']
        stored_filename_without_group = stored_filename[stored_filename.index('/')+1:]
        if not filename:
            filename =stored_filename_without_group
        vmp = {'group':ret_dict['Group name'],'file_id':stored_filename_without_group,'size':ret_dict['Uploaded size'],'upload_time':int(time.time()*1000)}
        try:
            for i in self.fdfs_redis:
                if not i.hmset(filename,vmp):
                    raise RedisError('Save Failure')
                logger.info('Store file(%s) by buffer successful' % filename)
        except Exception,e:
            logger.error('Save info to Redis failure. rollback...')
            try:
                ret_dict = self.fdfs_client.delete_file(stored_filename)
            except Exception,e:
                logger.error('Error occurred while deleting: %s'%e.message)
            return None
        return vmp
 
    def remove(self,filename):
        '''
        刪除檔案,
        filename是使用者自定義檔名
        return True|False
        '''
        fileinfo = random.choice(self.fdfs_redis).hgetall(filename)
        stored_filename = '%s/%s'%(fileinfo['group'],fileinfo['file_id'])
        try:
            ret_dict = self.fdfs_client.delete_file(stored_filename)
            logger.info('Remove stored file successful')
        except Exception,e:
            logger.error('Error occurred while deleting: %s'%e.message)
            return False
        for i in self.fdfs_redis:
            if not i.delete(filename):
                logger.error('Remove fileinfo in redis failure')
        logger.info('%s removed.'%filename)
        return True
 
    def download(self,filename):
        '''
        下載檔案
        返回二進位制
        '''
        finfo = self.getInfo(filename)
        if finfo:
            ret = self.fdfs_client.download_to_buffer('%s/%s'%(finfo['group'],finfo['file_id']))
            return ret['Content']
        else:
            logger.debug('%s is not exists'%filename)
            return None
 
    def list(self,pattern='*'):
        '''
        列出檔案列表
        '''
        return random.choice(self.fdfs_redis).keys(pattern)
 
    def getInfo(self,filename):
        '''
        獲得檔案資訊
        return:{
        'group':組名,
        'file_id':不含組名的檔案ID,
        'size':檔案尺寸,
        'upload_time':上傳時間
        }
        '''
        return random.choice(self.fdfs_redis).hgetall(filename)


配置:

# -*- coding: utf-8 -*-
#fastdfs tracker, multiple tracker supported
fdfs_tracker = {
'host_tuple':('192.168.2.233','192.168.2.234'),
'port':22122,
'timeout':30,
'name':'Tracker Pool'
}
#fastdfs meta db, multiple redisdb supported
fdfs_redis_dbs = (
    ('192.168.2.233',6379,0),
    ('192.168.2.233',6379,1)
)


相關文章