9. Simple Web Crawler
The following code crawl the first five pages of the BBC news search result (with the search term "hong kong")
1
import requests
2
from bs4 import BeautifulSoup
3
4
for i in range(1, 5):
5
url = 'https://www.bbc.co.uk/search/more?page=' + str(i) + '&q=hong+kong'
6
7
html_text = requests.get(url).text
8
html_data = BeautifulSoup(html_text, "html.parser")
9
10
headline_list = html_data.find_all('h1')
11
12
for headline in headline_list:
13
print(headline.find('a').get_text())
Copied!
Last modified 1yr ago
Copy link