Combining data sources
combining data sources.py
from bs4 import BeautifulSoup import urllib.request times_url = "https://digisoln.com/python/webscraping/times.html" times_req = urllib.request.urlopen(times_url) times = BeautifulSoup(times_req, features="html.parser") timetable_url = "https://digisoln.com/python/webscraping/timetable.html" timetable_req = urllib.request.urlopen(timetable_url) timetable = BeautifulSoup(timetable_req, features="html.parser") ####----------- MORE THEORY: ttable = timetable.find("table") #print(ttable.contents) #shows tag's children #print(len(list(ttable.children))) #splits by next child (tr) #print(len(list(ttable.descendants))) #splits by every tag (children of children) #print(ttable.contents[1]) #2nd child for every_tag in ttable.descendants: pass #etc for every_tr in ttable.children: pass #etc ####----------- lessons = [] all_tds = ttable.find_all("td") #this is a ResultSet not a list for each in all_tds[1::6]: #Monday's timetable (START = 1, STEP = 6) lessons.append(each.text) timings = [] period_time_tds = times.find_all("td") for each in period_time_tds[1::2]: timings.append(each.text) for i in range(len(lessons)): print(lessons[i], timings[i]) ####----------- PRODUCES: #Maths A1 0845-0945 #Maths A1 0945-1045 #Art H4 1115-1215 #Science SL1 1215-1315 #English B2 1345-1445 ####-----------