Web page data scrape from source line
My project requires data from another supplier, this is why I got acquainted with web scrape technology. Below is an example of reading data from web page code lines.
from bs4 import BeautifulSoup
import requests
from tkinter import *
import time
WD1=''
page_link = 'https://XXXXXXXXXXX.com/en-IE/weather/today/l/9c9fb864198c86c3047ec4081370217d460ba0c234706c99c50697a2fedd42f7'
window = Tk()
window.title("This is GUI")
window.configure(background = 'black')
window.bind('<Escape>', lambda e: window.destroy())
window.geometry('700x500')
frame = Frame (window, bg='black')
frame.pack()
weat_label= Label(frame, fg='white', bg = 'black', font=('calibri light', 72), anchor = W)
weat_label.grid()
def weather():
global WD1
page_response = requests.get(page_link)
try:
page_response = requests.get(page_link, timeout=5)
if page_response.status_code ==200:
page_content = BeautifulSoup(page_response.content, 'html.parser')
else:
lbl_ERROR = Label(window, text=page_response.status_code,fg = 'white', font=("Arial Bold", 40))
lbl_ERROR.grid(row =0, column = 0)
except requests.Timeout as e:
lbl_ERROR = Label(window, text="Timeout occured for requested page:"+page_link, fg = 'white', font=("Arial Bold", 12))
lbl_ERROR.grid(row =0, column = 0)
temp=page_content.find('div',attrs={"class":"today_nowcard-temp"}).text
today_name=page_content.find('div',attrs={"class":"today_nowcard-phrase"}).text
feels_temp=page_content.find('span',attrs={"classname":"deg-feels"}).text
WD=("Today : "+temp+"\n"+today_name+"\n"+"Feels like : "+feels_temp)
if WD != WD1:
WD1 = WD
weat_label.config(text=WD)
frame.after(10000, weather) #10 second
#print('*')
weather()
window.mainloop()
import requests
from tkinter import *
import time
WD1=''
page_link = 'https://XXXXXXXXXXX.com/en-IE/weather/today/l/9c9fb864198c86c3047ec4081370217d460ba0c234706c99c50697a2fedd42f7'
window = Tk()
window.title("This is GUI")
window.configure(background = 'black')
window.bind('<Escape>', lambda e: window.destroy())
window.geometry('700x500')
frame = Frame (window, bg='black')
frame.pack()
weat_label= Label(frame, fg='white', bg = 'black', font=('calibri light', 72), anchor = W)
weat_label.grid()
def weather():
global WD1
page_response = requests.get(page_link)
try:
page_response = requests.get(page_link, timeout=5)
if page_response.status_code ==200:
page_content = BeautifulSoup(page_response.content, 'html.parser')
else:
lbl_ERROR = Label(window, text=page_response.status_code,fg = 'white', font=("Arial Bold", 40))
lbl_ERROR.grid(row =0, column = 0)
except requests.Timeout as e:
lbl_ERROR = Label(window, text="Timeout occured for requested page:"+page_link, fg = 'white', font=("Arial Bold", 12))
lbl_ERROR.grid(row =0, column = 0)
temp=page_content.find('div',attrs={"class":"today_nowcard-temp"}).text
today_name=page_content.find('div',attrs={"class":"today_nowcard-phrase"}).text
feels_temp=page_content.find('span',attrs={"classname":"deg-feels"}).text
WD=("Today : "+temp+"\n"+today_name+"\n"+"Feels like : "+feels_temp)
if WD != WD1:
WD1 = WD
weat_label.config(text=WD)
frame.after(10000, weather) #10 second
#print('*')
weather()
window.mainloop()
Web page data scrape from source table
If I had a few small beds with rows, it doesn't easily give me the tables. First: the result of the function print is very different from the Tkinter Label result. Second: I had a long look at how the data was displayed, because the data was exchanged dynamically and in no way could the image be sufficient
Without tkinter print function
import requests
from bs4 import BeautifulSoup
import sys
if sys.version_info[0] == 2:
import Tkinter as tk #Linux
else:
import tkinter as tk
reload(sys)
sys.setdefaultencoding('utf8')
res_table = requests.get('https://xxxxxxx.com/en-IE/weather/tenday/l/LHXX6561:1:LH')
p_content = BeautifulSoup(res_table.content, 'html.parser')
table = p_content.find('table')
thead =table.find ('thead')
tbody = table.find('tbody')
for i in thead.findAll('tr'):
table_name = i.findAll('th')
day = table_name[0].text.strip()
descrip = table_name[1].text.strip()
hi_low = table_name[2].text.strip()
precipl = table_name[3].text.strip()
winds = table_name[4].text.strip()
humidity=table_name[5].text.strip()
print('{:8s} {:20s} {:10s} {:10s} {:10s} {:10s}'.format(day,descrip,hi_low, precipl, winds, humidity))
for ii in tbody.findAll('tr'):
cell = ii.findAll('td')
day = cell[1].find(text=True)
desc = cell[2].find(text=True)
h_l = cell[3].text.strip()
precip = cell[4].text.strip()
wind = cell[5].find(text=True)
hum = cell[6].text.strip()
print ('{:8s} {:20s} {:12s} {:8} {:12s} {:4s}'.format(day,desc,h_l,precip,wind,hum))
from bs4 import BeautifulSoup
import sys
if sys.version_info[0] == 2:
import Tkinter as tk #Linux
else:
import tkinter as tk
reload(sys)
sys.setdefaultencoding('utf8')
res_table = requests.get('https://xxxxxxx.com/en-IE/weather/tenday/l/LHXX6561:1:LH')
p_content = BeautifulSoup(res_table.content, 'html.parser')
table = p_content.find('table')
thead =table.find ('thead')
tbody = table.find('tbody')
for i in thead.findAll('tr'):
table_name = i.findAll('th')
day = table_name[0].text.strip()
descrip = table_name[1].text.strip()
hi_low = table_name[2].text.strip()
precipl = table_name[3].text.strip()
winds = table_name[4].text.strip()
humidity=table_name[5].text.strip()
print('{:8s} {:20s} {:10s} {:10s} {:10s} {:10s}'.format(day,descrip,hi_low, precipl, winds, humidity))
for ii in tbody.findAll('tr'):
cell = ii.findAll('td')
day = cell[1].find(text=True)
desc = cell[2].find(text=True)
h_l = cell[3].text.strip()
precip = cell[4].text.strip()
wind = cell[5].find(text=True)
hum = cell[6].text.strip()
print ('{:8s} {:20s} {:12s} {:8} {:12s} {:4s}'.format(day,desc,h_l,precip,wind,hum))
tkinter with Label function
!/usr/bin/env python
import sys
if sys.version_info[0] == 2:
import Tkinter as tk #Linux
else:
import tkinter as tk #Windose
reload(sys)
sys.setdefaultencoding('utf8')
from bs4 import BeautifulSoup
import requests
import feedparser
from itertools import cycle
import csv
import itertools
#-----------------------GUI---------------------------------------------
from tkinter import *
#-----------------------URL link---------------------------------------
url = ('https://xxxxxxx.com/en-GB/weather/5day/l/LHXX6561:1:LH')
b ='black',
w = 'white'
#-------------------------------------------------------------------------
table_frame_o =''
format_lines1=''
#----------GUI manipulation-----------------------------------------------
window = Tk()
window.configure(background = 'black')
window.bind('<Escape>', lambda e: window.destroy())
window.geometry ('950x350')
window.title ("Weather 5 day")
####################################################################################
table_frame = Frame(window)
table_frame.grid()
frame = Frame(table_frame, bg='black')
frame.grid()
def weather_10 ():
global table_frame_o
global format_lines1
try:
page_table = requests.get(url)
if page_table.status_code == 200:
p_content = BeautifulSoup(page_table.content, 'html.parser')
else:
lbl_ERROR=Label(root_frame,text="Uups..error load page"+page_table,fg='white',bg = b,font=('calibrilight 22')).grid()
time.sleep(10)
lbl_ERROR.destroy()
pass
except requests.TimeoutError :
lbl_ERROR2=Label(root_frame,text="Uups....time out",fg='white',bg = b,font=('calibrilight 22')).grid()
time.sleep(10)
lbl_ERROR2.destroy()
pass
table = p_content.find('table')
thead =table.find ('thead')
tbody = table.find('tbody')
for i in tbody.findAll('tr'):
cell = i.findAll('td')
day = cell[1].find(text=True)
desc = cell[2].text.strip()
hl = cell[3].text.strip()
precip = cell[4].text.strip()
wind = cell[5].text.strip()
hum = cell[6].text.strip()
data = open('weather_data.csv', 'a')
format_lines= (day+","+desc+","+hl+","+precip+","+wind+","+hum+"\n")
if format_lines != format_lines1:
format_lines1 =format_lines
data.write(format_lines)
data.close()
with open("weather_data.csv") as file:
reader = csv.reader(file)
r = 0
for col in reader:
c = 0
for row in col:
lbl = Label(frame, fg= 'white',bg='black',text = row, width = 12, height = 1,relief = FLAT, font=('calibrilight 18'))
lbl.grid(row = r, column = c)
c +=1
r += 1
table_frame.after(1200000, weather_10)
deleteContent()
def deleteContent():
with open('weather_data.csv', "w"):
pass
deleteContent()
weather_10()
window.mainloop()
The solution is:
Retrieve data puts in a file;
Opens a file formed by a string;
Put together by tkinter Label;
Delete tkinter cash;
Deletes the data from the file for the next function replay.
Retrieve data puts in a file;
Opens a file formed by a string;
Put together by tkinter Label;
Delete tkinter cash;
Deletes the data from the file for the next function replay.

crape_table.txt | |
File Size: | 3 kb |
File Type: | txt |