External Exam Download Resources Web Applications Games Recycle Bin

Combining data sources

combining data sources.py

from bs4 import BeautifulSoup
import urllib.request

times_url = "https://digisoln.com/python/webscraping/times.html"
times_req = urllib.request.urlopen(times_url)
times = BeautifulSoup(times_req, features="html.parser")

timetable_url = "https://digisoln.com/python/webscraping/timetable.html"
timetable_req = urllib.request.urlopen(timetable_url)
timetable = BeautifulSoup(timetable_req, features="html.parser")

####----------- MORE THEORY:
ttable = timetable.find("table")
#print(ttable.contents) #shows tag's children
#print(len(list(ttable.children))) #splits by next child (tr)
#print(len(list(ttable.descendants))) #splits by every tag (children of children)
#print(ttable.contents[1]) #2nd child
for every_tag in ttable.descendants:
    pass #etc
for every_tr in ttable.children:
    pass #etc
####-----------

lessons = []
all_tds = ttable.find_all("td") #this is a ResultSet not a list
for each in all_tds[1::6]: #Monday's timetable (START = 1, STEP = 6)
    lessons.append(each.text)

timings = []
period_time_tds = times.find_all("td")
for each in period_time_tds[1::2]:
    timings.append(each.text)

for i in range(len(lessons)):
    print(lessons[i], timings[i])

####----------- PRODUCES:
#Maths A1 0845-0945
#Maths A1 0945-1045
#Art H4 1115-1215
#Science SL1 1215-1315
#English B2 1345-1445
####-----------