示例一
import csv
import json
import codecs
import os
import string
import sys
import time
import urllib.request
from urllib.parse import quote
from builtins import object, float, range, int, len, open, list, str
class BaiDuPOI(object):
def __init__(self, itemy, loc):
self.itemy = itemy
self.loc = loc
def urls(self):
api_key = baidu_api
urls = []
for pages in range(0, 2):
url = 'http://api.map.baidu.com/place/v2/search?query=' + self.itemy + '&bounds=' + self.loc + '&page_size=20&page_num=' + str(
pages) + '&output=json&ak=' + api_key
urls.append(url)
return urls
def baidu_search(self):
'''json_sel = []
for url in self.urls():
s = quote(url, safe=string.printable)
json_obj = urllib.request.urlopen(s).read().decode('utf-8')
data = json.loads(json_obj)
for item in data['results']:
jname = item["name"]
jlat = item["location"]["lat"]
jlng = item["location"]["lng"]
js_sel = jname + ',' + str(jlat) + ',' + str(jlng)
json_sel.append(js_sel)
return json_sel
'''
data = []
for url in self.urls():
s = quote(url, safe=string.printable)
json_obj = urllib.request.urlopen(s).read().decode('utf-8')
data.append(json.loads(json_obj))
time.sleep(1)
return data
class LocaDiv(object):
def __init__(self, loc_all):
self.loc_all = loc_all
def lat_all(self):
lat_sw = float(self.loc_all.split(',')[0])
lat_ne = float(self.loc_all.split(',')[2])
lat_list = []
for i in range(0, int((lat_ne - lat_sw + 0.0001) / 0.2)):
lat_list.append(lat_sw + 0.2 * i)
lat_list.append(lat_ne)
return lat_list
def lng_all(self):
lng_sw = float(self.loc_all.split(',')[1])
lng_ne = float(self.loc_all.split(',')[3])
lng_list = []
for i in range(0, int((lng_ne - lng_sw + 0.0001) / 0.3)):
lng_list.append(lng_sw + 0.3 * i)
lng_list.append(lng_ne)
return lng_list
def ls_com(self):
l1 = self.lat_all()
l2 = self.lng_all()
ab_list = []
for i in range(0, len(l1)):
a = str(l1[i])
for i2 in range(0, len(l2)):
b = str(l2[i2])
ab = a + ',' + b
ab_list.append(ab)
return ab_list
def ls_row(self):
l1 = self.lat_all()
l2 = self.lng_all()
ls_com_v = self.ls_com()
ls = []
for n in range(0, len(l1) - 1):
for i in range(0 + len(l1) * n, len(l2) + (len(l2)) * n - 1):
a = ls_com_v[i]
b = ls_com_v[i + len(l2) + 1]
ab = a + ',' + b
ls.append(ab)
return ls
if __name__ == '__main__':
doc = open('NTPOI.csv', 'a+')
writer = csv.writer(doc)
baidu_api = "*********************"
print("開始爬取資料,請稍等...")
start_time = time.time()
loc = LocaDiv('31.69,120.54,32.65,121.95')
locs_to_use = loc.ls_row()
for loc_to_use in locs_to_use:
par = BaiDuPOI('學校', loc_to_use)
'''
a = par.baidu_search()
for ax in a:
writer.writerow(a)
'''
listdata = par.baidu_search()
for resultIndex in range(len(listdata)):
resultlist=listdata[resultIndex]["results"]
for pIndex in range(len(resultlist)):
writer.writerow(list(resultlist[pIndex].values()))
doc.close()
end_time = time.time()
print("學校爬取完畢,用時%.2f秒" % (end_time - start_time))
示例二
import csv
import string
import urllib
import json
from urllib.parse import quote
left_bottom = [120.89,31.83];
right_top = [121.40,32.47];
part_n = 8;
url0 = 'http://api.map.baidu.com/place/v2/search?';
x_item = (right_top[0]-left_bottom[0])/part_n;
y_item = (right_top[1]-left_bottom[1])/part_n;
query = '學校';
ak = 'OGTkT2pv********************mm9mek';
n = 0;
datacsv=open("baidu.csv", "a+", encoding="utf-8");
csvwriter = csv.writer(datacsv, dialect=("excel"))
for i in range(part_n):
for j in range(part_n):
left_bottom_part = [left_bottom[0]+i*x_item,left_bottom[1]+j*y_item];
right_top_part = [right_top[0]+i*x_item,right_top[1]+j*y_item];
for k in range(20):
url = url0 + 'query=' + query + '&page_size=20&page_num=' + str(k) + '&scope=1&bounds=' + str(left_bottom_part[1]) + ',' + str(left_bottom_part[0]) + ','+str(right_top_part[1]) + ',' + str(right_top_part[0]) + '&output=json&ak=' + ak;
s=quote(url, safe=string.printable)
data = urllib.request.urlopen(s);
hjson = json.loads(data.read().decode('utf-8'));
if hjson['message'] == 'ok':
results = hjson['results'];
for m in range(len(results)):
csvwriter.writerow(list(results[m].values()))
n += 1;
print('第',str(n),'個切片入庫成功')