BS4 Functions

bs4 functions.py

from bs4 import BeautifulSoup
import urllib.request
url = "https://digisoln.com/python/webscraping/timetable.html"
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, features="html.parser")
print("raw:", soup) #unindented raw with html
print("prettify:", soup.prettify()) #indents / nested data struct
print("text only:", soup.get_text()) #no html
print("title:", soup.title) #<title>Timetable</title>
print("title.string:", soup.title.string) #Timetable
for td in soup.find_all("td"):
print(td) #<td>English <span class="room">B2</span></td> ETC..
print("a:", soup.a) #first (only) link eg <a href="...">...</a>
for link in soup.find_all("a"):
print(link) #ALL links eg <a href="...">...</a>
print(link.get('href')) #url only eg https://www...
room = soup.find("span", {"class": "room"})
print(room) #<span class="room">A1</span> = first room
for rooms in soup.find_all("span", {"class": "room"}):
print(rooms) #same output as previous but all rooms
print(rooms.get_text()) #strips HTML
#more docs - https://www.crummy.com/software/BeautifulSoup/bs4/doc/
הההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההההה
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

BS4 Documentation