用BeautifulSoup简单爬取BOSS直聘网岗位
import requestsfrom bs4 import BeautifulSoupdef fun(path): r1 = requests.get(url=path, headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3469.400'}, ) soup = BeautifulSoup(r1.text,'html.parser') div = soup.find(name='div',id='main') info = div.find(name='div',attrs={'class':'job-list'}) li = info.find_all(name='li') f = open('boss直聘.txt','a',encoding='utf-8') for i in li: job = i.find(name='div',attrs={'class':'job-title'}) red = i.find(name='span',attrs={'class':'red'}) company = i.find_all(name="a") em = i.find_all(name='p') info = "%s|%s|%s|%s|%s"%(job.text,red.text,em[0].text,em[1].text,company[1].text) f.write(info+'\n') f.close()for i in range(1,11): path = 'https://www.zhipin.com/c101010100-p100109/?page=%d&ka=page-%d'%(i,i) fun(path)
此代码仅供学习使用。