百度地圖POI爬蟲(Python3)

風靈使發表於2018-09-07

示例一

# -*- coding:utf-8 -*-
import csv
import json
import codecs
import os
import string
import sys
import time
import urllib.request
from urllib.parse import quote
from builtins import object, float, range, int, len, open, list, str


class BaiDuPOI(object):
    def __init__(self, itemy, loc):
        self.itemy = itemy
        self.loc = loc

    def urls(self):
        api_key = baidu_api
        urls = []
        for pages in range(0, 2):
            url = 'http://api.map.baidu.com/place/v2/search?query=' + self.itemy + '&bounds=' + self.loc + '&page_size=20&page_num=' + str(
                pages) + '&output=json&ak=' + api_key
            urls.append(url)
        return urls

    def baidu_search(self):
        '''json_sel = []
        for url in self.urls():
            s = quote(url, safe=string.printable)
            json_obj = urllib.request.urlopen(s).read().decode('utf-8')
            data = json.loads(json_obj)
            for item in data['results']:
                jname = item["name"]
                jlat = item["location"]["lat"]
                jlng = item["location"]["lng"]
                js_sel = jname + ',' + str(jlat) + ',' + str(jlng)
                json_sel.append(js_sel)
        return json_sel
        '''
        data = []
        for url in self.urls():
            s = quote(url, safe=string.printable)
            json_obj = urllib.request.urlopen(s).read().decode('utf-8')
            data.append(json.loads(json_obj))
            time.sleep(1)  # 休眠1秒
        return data


class LocaDiv(object):
    def __init__(self, loc_all):
        self.loc_all = loc_all

    def lat_all(self):
        lat_sw = float(self.loc_all.split(',')[0])
        lat_ne = float(self.loc_all.split(',')[2])
        lat_list = []
        for i in range(0, int((lat_ne - lat_sw + 0.0001) / 0.2)):  # 0.1為網格大小,可更改
            lat_list.append(lat_sw + 0.2 * i)  # 0.05
        lat_list.append(lat_ne)
        return lat_list

    def lng_all(self):
        lng_sw = float(self.loc_all.split(',')[1])
        lng_ne = float(self.loc_all.split(',')[3])
        lng_list = []
        for i in range(0, int((lng_ne - lng_sw + 0.0001) / 0.3)):  # 0.2為網格大小,可更改
            lng_list.append(lng_sw + 0.3 * i)  # 0.2為網格大小,可更改
        lng_list.append(lng_ne)
        return lng_list

    def ls_com(self):
        l1 = self.lat_all()
        l2 = self.lng_all()
        ab_list = []
        for i in range(0, len(l1)):
            a = str(l1[i])
            for i2 in range(0, len(l2)):
                b = str(l2[i2])
                ab = a + ',' + b
                ab_list.append(ab)
        return ab_list

    def ls_row(self):
        l1 = self.lat_all()
        l2 = self.lng_all()
        ls_com_v = self.ls_com()
        ls = []
        for n in range(0, len(l1) - 1):
            for i in range(0 + len(l1) * n, len(l2) + (len(l2)) * n - 1):
                a = ls_com_v[i]
                b = ls_com_v[i + len(l2) + 1]
                ab = a + ',' + b
                ls.append(ab)
        return ls


if __name__ == '__main__':
    doc = open('NTPOI.csv', 'a+')
    writer = csv.writer(doc)

    # ak
    baidu_api = "*********************"  # 這裡填入你的百度API的ak
    print("開始爬取資料,請稍等...")
    start_time = time.time()
    loc = LocaDiv('31.69,120.54,32.65,121.95')
    locs_to_use = loc.ls_row()

    for loc_to_use in locs_to_use:
        par = BaiDuPOI('學校', loc_to_use)  # 請修改爬取的類別
        '''
        a = par.baidu_search()
        for ax in a:
            writer.writerow(a)
       '''

        listdata = par.baidu_search()
        for resultIndex in range(len(listdata)):  # 提取返回的結果
            resultlist=listdata[resultIndex]["results"]
            for pIndex in  range(len(resultlist)):
                 writer.writerow(list(resultlist[pIndex].values()))
        # time.sleep(1)  # 休眠1秒

    doc.close()
    end_time = time.time()
    print("學校爬取完畢,用時%.2f秒" % (end_time - start_time))

示例二

# 提取城市的POI點資訊並將其儲存至CSV
import csv
import string
import urllib
import json
from urllib.parse import quote

left_bottom = [120.89,31.83];  # 設定區域左下角座標(百度座標系)
right_top = [121.40,32.47]; # 設定區域右上角座標(百度座標系)
part_n = 8;  # 設定區域網格(2*2)
url0 = 'http://api.map.baidu.com/place/v2/search?';
x_item = (right_top[0]-left_bottom[0])/part_n;
y_item = (right_top[1]-left_bottom[1])/part_n;
query = '學校'; #搜尋關鍵詞設定
ak = 'OGTkT2pv********************mm9mek'; #百度地圖api信令
n = 0; # 切片計數器

datacsv=open("baidu.csv", "a+", encoding="utf-8");
csvwriter = csv.writer(datacsv, dialect=("excel"))

for i in range(part_n):
    for j in range(part_n):
        left_bottom_part = [left_bottom[0]+i*x_item,left_bottom[1]+j*y_item]; # 切片的左下角座標
        right_top_part = [right_top[0]+i*x_item,right_top[1]+j*y_item]; # 切片的右上角座標
        for k in range(20):
            url = url0 + 'query=' + query + '&page_size=20&page_num=' + str(k) + '&scope=1&bounds=' + str(left_bottom_part[1]) + ',' + str(left_bottom_part[0]) + ','+str(right_top_part[1]) + ',' + str(right_top_part[0]) + '&output=json&ak=' + ak;
            s=quote(url, safe=string.printable)
            data = urllib.request.urlopen(s);
            hjson = json.loads(data.read().decode('utf-8'));
            if hjson['message'] == 'ok':
                results = hjson['results'];
                for m in range(len(results)): # 提取返回的結果
                    csvwriter.writerow(list(results[m].values()))
        n += 1;
        print('第',str(n),'個切片入庫成功')

相關文章