提取獎勵辦資料中人員資訊(自用)

右介發表於2018-08-07

2016年

#coding=utf-8
import re
from pymongo import MongoClient

client = MongoClient("localhost", 27017)
db = client["nosta"]
collection1 = db["nosta_2016"]
collection2 = db["2016_list"]
db.authenticate("zty","zty")

n = 0
for item in collection1.find({}, {"project_name":1, "project_content":1, "_id":0}):
    n += 1
    print n
    if item['project_content'].has_key(u'主要完成人'):
        ls = item['project_content'][u'主要完成人']
        if ls:
            for line in ls:
                # print line
                matchObj1 = re.search( ur'(姓名:.*?) .*', line)
                matchObj2 = re.search( ur'.* (行政職務:.*?) .*', line)
                matchObj3 = re.search( ur'.* (技術職稱:.*?) .*', line)
                matchObj4 = re.search( ur'.* (工作單位:.*?) .*', line)
                matchObj5 = re.search( ur'.* (對本專案技術創造性貢獻:.*?) .*', line)
                matchObj6 = re.search( ur'.* (對本專案主要學術貢獻:.*?) .*', line)
                matchObj7 = re.search( ur'.* (曾獲國家科技獎勵情況:.*)', line)

                dc = {}
                dc['project_name'] = item['project_name']
                dc['name'] = matchObj1.group(1) if matchObj1 else ''
                dc['duty'] = matchObj2.group(1) if matchObj2 else ''
                dc['title'] = matchObj3.group(1) if matchObj3 else ''
                dc['unit'] = matchObj4.group(1) if matchObj4 else ''
                dc['contribution'] = matchObj5.group(1) if matchObj5 else ''
                if dc['contribution']=='':
                    dc['contribution'] = matchObj6.group(1) if matchObj6 else ''
                dc['award'] = matchObj7.group(1) if matchObj7 else ''
                # for k, v in dc.items():
                #   print k, v
                collection2.insert(dc)

 

2017、2018年

#coding=utf-8
import re
from pymongo import MongoClient

client = MongoClient("localhost", 27017)
db = client["nosta"]
collection1 = db["nosta_2017"]
collection2 = db["2017_list"]
db.authenticate("zty","zty")

n = 0
for item in collection1.find({}, {"project_name":1, "project_content":1, "_id":0}):

    n += 1
    print n

    if item['project_content'].has_key(u'主要完成人:'):
        choice = item['project_content'][u'主要完成人:']
        if choice == []:
            continue
        ls = choice.split(u'姓名:')[1:]
        for line in ls:
            line = line.replace(u'排名:',u' 排名:')
            line = line.replace(u'行政職務:',u' 行政職務:')
            line = line.replace(u'技術職稱:',u' 技術職稱:')
            line = line.replace(u'工作單位:',u' 工作單位:')
            line = line.replace(u'完成專案時所在單位:',u' 完成專案時所在單位:')
            line = line.replace(u'對本專案技術創造性貢獻:',u' 對本專案技術創造性貢獻:')
            line = line.replace(u'對本專案主要學術貢獻:',u' 對本專案主要學術貢獻:')
            line = line.replace(u'曾獲國家科技獎勵情況:',u' 曾獲國家科技獎勵情況:')
            line = u'姓名:' + line
            # print line
            matchObj1 = re.search( ur'(姓名:.*?) .*', line)
            matchObj2 = re.search( ur'.* (行政職務:.*?) .*', line)
            matchObj3 = re.search( ur'.* (技術職稱:.*?) .*', line)
            matchObj4 = re.search( ur'.* (工作單位:.*?) .*', line)
            matchObj5 = re.search( ur'.* (對本專案技術創造性貢獻:.*?) .*', line)
            matchObj6 = re.search( ur'.* (對本專案主要學術貢獻:.*?) .*', line)
            matchObj7 = re.search( ur'.* (曾獲國家科技獎勵情況:.*)', line)

            dc = {}
            dc['project_name'] = item['project_name']
            dc['name'] = matchObj1.group(1) if matchObj1 else ''
            dc['duty'] = matchObj2.group(1) if matchObj2 else ''
            dc['title'] = matchObj3.group(1) if matchObj3 else ''
            dc['unit'] = matchObj4.group(1) if matchObj4 else ''
            dc['contribution'] = matchObj5.group(1) if matchObj5 else ''
            if dc['contribution']=='':
                dc['contribution'] = matchObj6.group(1) if matchObj6 else ''
            dc['award'] = matchObj7.group(1) if matchObj7 else ''
            # for k, v in dc.items():
            #   print k, v
            collection2.insert(dc)

 

相關文章