import requests
from bs4 import BeautifulSoup
url="https://www.163.com/"
wbdata = requests.get(url).text
soup =BeautifulSoup(wbdata,'html.parser')
news_titles = soup.select("div>ul>li>a")
for n in news_titles:
title=n.get_text()
link=n.get("href")
data = {'标题':title,'链接':link}
print(data)
然后运行,效果如下
一个最简单的爬虫就搞定了。
|