python爬取58同城一頁資料

fan_lulu發表於2018-08-04

程式碼如下:

from bs4 import BeautifulSoup
import requests

def get_link_from(whos_sells=0):
    urls=[]
    list_view = 'http://xa.58.com/pingbandiannao/{}/pn1/'.format(str(whos_sells))
    wb_data = requests.get(list_view)
    soup = BeautifulSoup(wb_data.text,'lxml')
    if whos_sells == 0:
        for link in soup.select('td.t a.t'):
            urls.append(link.get('href').split('?')[0])
    else:
        for link in soup.select('div.left a.title.t'):
            urls.append(link.get('href'))
    return urls

def get_views_from(url):
    id = url.split('/')[-1].strip('x.shtml')
    api = 'http://jst1.58.com/counter?infoid={}'.format(id)
    js = requests.get(api)
    views = js.text.split('=')[-1]
    return views

def get_info_item(whos_sells):
    urls=get_link_from(whos_sells)
    print(urls)
    for url in urls:
        wb_data = requests.get(url)
        soup = BeautifulSoup(wb_data.text, 'lxml')
        data = {
            'title' : soup.title.text,
            'price' : soup.select('span.price_now i')[0].text if soup.find_all('span','price_now') else None,
            'addr'  : list(soup.select('div.palce_li span i')[0].stripped_strings) if soup.find_all('div','palce_li') else None,
            #也可以為
            #'addr'  : list(soup.select('div.palce_li span i')[0].stripped_strings) if soup.find_all('div',class_='palce_li') else None,
            'cate'  : 'person' if whos_sells == 0 else 'seller',
            'views' : get_views_from(url)
        }
        print(data)

get_info_item(0)
#get_link_from(1)
#get_views_from(url)

當get_info_item()傳入引數為0時,可爬取個人第一頁資料,當傳入引數為1時,按道理說應該爬取商家第一頁資訊,但是結果卻出錯,可能是因為商家資訊爬取時便籤不一樣,導致爬取失敗,後續將會進行程式碼優化

相關文章