INTELLIGENT ENVIRONMENT
  • Home
  • About
  • PROJECTS
  • Contact

Python

Web page data scrape from source line

My project requires data from another supplier, this is why I got acquainted with web scrape technology. Below is an example of reading data from web page code lines.
Picture
from bs4 import BeautifulSoup
import requests

from tkinter import *
import time

WD1=''
page_link = 'https://XXXXXXXXXXX.com/en-IE/weather/today/l/9c9fb864198c86c3047ec4081370217d460ba0c234706c99c50697a2fedd42f7'

window = Tk()
window.title("This is GUI")
window.configure(background = 'black')
window.bind('<Escape>', lambda e: window.destroy())
window.geometry('700x500')

frame = Frame (window, bg='black')
frame.pack()

weat_label= Label(frame, fg='white', bg = 'black', font=('calibri light', 72), anchor = W)
weat_label.grid()

      
def weather():
 global WD1
 page_response = requests.get(page_link)
 try:
       page_response = requests.get(page_link, timeout=5)
       if page_response.status_code ==200:
          page_content = BeautifulSoup(page_response.content, 'html.parser')
       
       else:
          lbl_ERROR = Label(window, text=page_response.status_code,fg = 'white', font=("Arial Bold", 40))
          lbl_ERROR.grid(row =0, column = 0)

 except requests.Timeout as e:
      lbl_ERROR = Label(window, text="Timeout occured for requested page:"+page_link, fg = 'white', font=("Arial Bold", 12))
      lbl_ERROR.grid(row =0, column = 0)
 
 temp=page_content.find('div',attrs={"class":"today_nowcard-temp"}).text
 today_name=page_content.find('div',attrs={"class":"today_nowcard-phrase"}).text
 feels_temp=page_content.find('span',attrs={"classname":"deg-feels"}).text

 
 WD=("Today : "+temp+"\n"+today_name+"\n"+"Feels like : "+feels_temp)

 if WD != WD1:
      WD1 = WD
 weat_label.config(text=WD)
 
 frame.after(10000, weather) #10 second
 #print('*')

weather()
window.mainloop()

Web page data scrape from source table

Picture
If I had a few small beds with rows, it doesn't easily give me the tables. First: the result of the function print is very different from the Tkinter Label result. Second: I had a long look at how the data was displayed, because the data was exchanged dynamically and in no way could the image be sufficient

Without tkinter print function

import requests
from bs4 import BeautifulSoup

import sys
if sys.version_info[0] == 2:
    import Tkinter as tk  #Linux
else:
    import tkinter as tk
reload(sys)
sys.setdefaultencoding('utf8')

res_table = requests.get('https://xxxxxxx.com/en-IE/weather/tenday/l/LHXX6561:1:LH')
p_content = BeautifulSoup(res_table.content, 'html.parser')

table = p_content.find('table')
thead =table.find ('thead')
tbody = table.find('tbody')
 
for i in thead.findAll('tr'):
    table_name = i.findAll('th')
    
    day = table_name[0].text.strip()
    descrip = table_name[1].text.strip()
    hi_low = table_name[2].text.strip()
    precipl = table_name[3].text.strip()
    winds = table_name[4].text.strip()
    humidity=table_name[5].text.strip()
       
    print('{:8s} {:20s} {:10s} {:10s} {:10s} {:10s}'.format(day,descrip,hi_low, precipl, winds, humidity))

for ii in tbody.findAll('tr'):
    cell = ii.findAll('td')
   
    day = cell[1].find(text=True)
    desc = cell[2].find(text=True)
    h_l = cell[3].text.strip()
    precip = cell[4].text.strip()
    wind = cell[5].find(text=True)
    hum = cell[6].text.strip()
        
    print ('{:8s} {:20s} {:12s} {:8} {:12s} {:4s}'.format(day,desc,h_l,precip,wind,hum))


tkinter with Label function


!/usr/bin/env python
import sys
if sys.version_info[0] == 2:
    import Tkinter as tk  #Linux
else:
    import tkinter as tk   #Windose
reload(sys)
sys.setdefaultencoding('utf8')

from bs4 import BeautifulSoup
import requests

import feedparser
from itertools import cycle

import csv
import itertools
#-----------------------GUI---------------------------------------------
from tkinter import *
#-----------------------URL link---------------------------------------
url = ('https://xxxxxxx.com/en-GB/weather/5day/l/LHXX6561:1:LH')
b ='black',
w = 'white'
#-------------------------------------------------------------------------
table_frame_o =''
format_lines1=''
#----------GUI manipulation-----------------------------------------------

window = Tk()
window.configure(background = 'black')
window.bind('<Escape>', lambda e: window.destroy())
window.geometry ('950x350')
window.title ("Weather 5 day")
####################################################################################
table_frame = Frame(window)
table_frame.grid()

frame = Frame(table_frame, bg='black')
frame.grid()

def weather_10 ():
 global table_frame_o
 global format_lines1
 try:
        
        page_table = requests.get(url)
        if page_table.status_code == 200:
               p_content = BeautifulSoup(page_table.content, 'html.parser')
        else:
               lbl_ERROR=Label(root_frame,text="Uups..error load page"+page_table,fg='white',bg = b,font=('calibrilight 22')).grid()
               time.sleep(10)
               lbl_ERROR.destroy()
               pass
 except requests.TimeoutError :
        lbl_ERROR2=Label(root_frame,text="Uups....time out",fg='white',bg = b,font=('calibrilight 22')).grid()
        time.sleep(10)
        lbl_ERROR2.destroy()
        pass

 table = p_content.find('table')
 thead =table.find ('thead')
 tbody = table.find('tbody')
      
 for i in tbody.findAll('tr'):
       cell = i.findAll('td')

       day = cell[1].find(text=True)
       desc = cell[2].text.strip()
       hl = cell[3].text.strip()
       precip = cell[4].text.strip()
       wind = cell[5].text.strip()
       hum = cell[6].text.strip()
       
       data = open('weather_data.csv', 'a')
       format_lines= (day+","+desc+","+hl+","+precip+","+wind+","+hum+"\n")
       if format_lines != format_lines1:
        format_lines1 =format_lines
        data.write(format_lines)
        data.close()


 with open("weather_data.csv") as file:
  reader = csv.reader(file)
  r = 0
  for col in reader:
   c = 0
   for row in col:
    lbl = Label(frame, fg= 'white',bg='black',text = row, width = 12, height = 1,relief = FLAT, font=('calibrilight 18'))
    lbl.grid(row = r, column = c)
     
    c +=1
   r += 1
   
 table_frame.after(1200000, weather_10)
 deleteContent()
   
def deleteContent():
 with open('weather_data.csv', "w"):
        pass
deleteContent()

weather_10()
window.mainloop()

The solution is:
Retrieve data puts in a file;
Opens a file formed by a string;
Put together by tkinter Label;
Delete tkinter cash;
Deletes the data from the file for the next function replay.

crape_table.txt
File Size: 3 kb
File Type: txt
Download File

Result:

Picture
Powered by Create your own unique website with customizable templates.
  • Home
  • About
  • PROJECTS
  • Contact