BS4 Functions
bs4 functions.py
from bs4 import BeautifulSoupimport urllib.requesturl = "https://digisoln.com/python/webscraping/timetable.html"page = urllib.request.urlopen(url)soup = BeautifulSoup(page, features="html.parser")print("raw:", soup) #unindented raw with htmlprint("prettify:", soup.prettify()) #indents / nested data structprint("text only:", soup.get_text()) #no htmlprint("title:", soup.title) #<title>Timetable</title>print("title.string:", soup.title.string) #Timetablefor td in soup.find_all("td"):print(td) #<td>English <span class="room">B2</span></td> ETC..print("a:", soup.a) #first (only) link eg <a href="...">...</a>for link in soup.find_all("a"):print(link) #ALL links eg <a href="...">...</a>print(link.get('href')) #url only eg https://www...room = soup.find("span", {"class": "room"})print(room) #<span class="room">A1</span> = first roomfor rooms in soup.find_all("span", {"class": "room"}):print(rooms) #same output as previous but all roomsprint(rooms.get_text()) #strips HTML#more docs - https://www.crummy.com/software/BeautifulSoup/bs4/doc/