在scrapy的pipelines中連線資料庫

weixin_34146805發表於2018-06-15

在scrapy專案中的settings.py中設定

#連線mongo
MONGODB_HOST = '127.0.0.1'
MONGODB_PORT = 27017
MONGODB_DBNAME = 'data'
MONGODB_DOCNAME = 'chinadata'
ITEM_PIPELINES = {
   # 'chinaico.pipelines.ChinaicoPipeline': 300,
   'chinaico.pipelines.RedisPipeline':301,
   'chinaico.pipelines.MongoPipeline':301,
}

在scrapy專案中的pipelines.py中設定

import json
import redis
import pymongo
from pymongo import MongoClient
from  scrapy.conf import settings
import datetime

class ChinaicoPipeline(object):
    def process_item(self, item, spider):
        return item

class RedisPipeline(object):
    def __init__(self):
        self.r = redis.StrictRedis(host='127.0.0.1',password='', port=6379, db=3)


    def process_item(self, item, spider):
        self.r.sadd("webchinadata", json.dumps(dict(item),ensure_ascii=False))
        return item


class MongoPipeline(object):

    def __init__(self):    
        host = settings['MONGODB_HOST']
        port = settings['MONGODB_PORT']
        db_name = settings['MONGODB_DBNAME']
        client = pymongo.MongoClient(host=host, port=port)
        db = client[db_name]
        self.post = db[settings['MONGODB_DOCNAME']]
      
    def process_item(self, item, spider):
        china_data = dict(item)
        self.post.insert(china_data,"time":datetime.datetime.now())插入資料
        self.post.update({"name":item["name"]},{"$set":china_data},upsert = True)
更新資料
        return item

相關文章