I can't write data into csv by schedule - python

I set schedule 30 seconds, but data will write in csv every 1-2 seconds...
I try a lot of ways to fix it, but there is no use.
Hope my get data write in csv every 30 seconds.
Hope somebody can help me solve it, thanks a lot!
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
from datetime import datetime as dt
import csv
stock_no = input('Please insert stock no:')
set_price = '%.2f' % float(input('Please set notification price:'))
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
time_now = dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
with open('C:/Python workspace/stock_value_notification/index.csv', 'a', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([tittle, li, time_now])
return li, tittle, time_now
schedule.every(10).seconds.do(get_stock_price)
while True:
try:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()[0]
if set_price == current_price:
msg_text = get_stock_price()[1] + \
'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
except:
print('It is not working...')
break
this csv file is the result I program

You are using the schedule the wrong way.
I hope this pesudo code example will help you:
def scrape_and_save():
res = scrape()
save_to_csv(res)
schedule.every(10).seconds.do(scrape_and_save)
while True:
schedule.run_pending()
time.sleep(1)

You have to move the notify section into either a separate function or into get_stock_price(). Here is a working refactored code.
Schedule will call get_stock_price every interval and notification is later called once data is written into the o/p file.
Adjust your it according to your need further.
import bs4
import requests
import schedule
import time
from win10toast import ToastNotifier
from datetime import datetime as dt
import csv
import sys
stock_no = input('Please insert stock no:')
set_price = '%.2f' % float(input('Please set notification price:'))
def get_stock_price():
try:
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
time_now = dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
with open('index.csv', 'a', newline='', encoding="utf-8") as csv_file:
writer = csv.writer(csv_file)
writer.writerow([tittle, li, time_now])
notify(li, tittle)
except Exception as e:
print('It is not working...')
print(e)
sys.exit()
def notify(current_price, stock_title):
if set_price == current_price:
msg_text = stock_title + \
' stock value is ' + current_price
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
schedule.every(10).seconds.do(get_stock_price)
while True:
schedule.run_pending()
time.sleep(1)
PS: webscraping activity may cause additional delay sometimes adding to the execution time.

Related

Web scraping code using BS4+request not refreshing

I have a problem with a code that scrapes a weather website. It's supposed to update hourly, but for some reason, the data given is not the current data on the website; it also doesn't update its data, but keeps feeding the same data continuously. Please help!!!
Also, I need help scraping the weather icon from the site.
Here is my code:
from bs4 import BeautifulSoup
from plyer import notification
import requests
import time
if __name__ == '__main__':
while True:
def notifyMe(title, message):
notification.notify(
title = title,
message = message,
#app_icon = icon,
timeout = 7
)
try:
# site = requests.get('https://weather.com/weather/today/l/5.02,7.97?par=google')
site = requests.get('https://weather.com/en-NG/weather/today/l/4dce0117809bca3e9ecdaa65fb45961a9718d6829adeb72b6a670240e10bd8c9')
# site = requests.get('http://localhost/weather.com/weather/today/l/5.02,7.97.html')
soup = BeautifulSoup(site.content, 'html.parser')
day = soup.find(class_= 'CurrentConditions--CurrentConditions--14ztG')
location = day.find(class_='CurrentConditions--location--2_osB').get_text()
timestamp = day.find(class_='CurrentConditions--timestamp--3_-CV').get_text()
tempValue = day.find(class_='CurrentConditions--tempValue--1RYJJ').get_text()
phraseValue = day.find(class_='CurrentConditions--phraseValue--17s79').get_text()
precipValue = day.find(class_='CurrentConditions--precipValue--1RgXi').get_text()
#icon = day.find(id ='svg-symbol-cloud').get_icon()
weather = timestamp + "\n" + tempValue + " " + phraseValue + "\n" + precipValue
except requests.exceptions.ConnectionError:
location = "Couldn't get a location."
weather = "Error connecting to website."
except AttributeError:
weather = timestamp + "\n" + tempValue + " " + phraseValue
# print (weather)
notifyMe( location, weather )
time.sleep(30)
Expected output:
Uyo, Akwa Ibom Weather
As of 13:28 WAT
30° Mostly Cloudy
55% chance of rain until 14:00
import requests
from bs4 import BeautifulSoup
def main(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
x = list(soup.select_one('.card').stripped_strings)
del x[4:8]
print(x)
main('https://weather.com/en-NG/weather/today/l/4dce0117809bca3e9ecdaa65fb45961a9718d6829adeb72b6a670240e10bd8c9')
Output:
['Uyo, Akwa Ibom Weather', 'As of 8:03 WAT', '24°', 'Cloudy', '47% chance of rain until 9:00']
It appears the error might have been from the site, because it's working now without the issues. Thank you all for the suggestions. #Ahmed American your code is beautiful. I've learnt from it. #furas I'll try to construct the SVG as you suggested.
That's the output.

Python scheduling- how can I pulling data out of HTML every 5 min

I'm practicing to build a project. I want get the stock value every 5 min, if the price is the same with setting price, I will got an e-mail and desktop notification, but now I have some trouble...I don't know how to fix this...
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Plesae insert stock no:')
set_price = input('Please set notification price:')
def job():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
msg_text = tittle + 'stock value is ' + li
schedule.every(5).minutes.do(job)
while True:
schedule.run_pending()
time.sleep(1)
if set_price is li:
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
there's something wrong...like this
This is my problem
You have declared "li" and "msg_text" inside of the "job" function, that means that these variables are just available inside the "job" function.
There are many ways to solve this problem, I will just propose one to try to help you:
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Please insert stock no:')
set_price = input('Please set notification price:')
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
return li
schedule.every(5).minutes.do(job)
while True:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()
if set_price == current_price:
msg_text = tittle + 'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
I didn't test the code above but it may be useful to you understand the error that you posted.
Good luck and happy coding!
Finally, I fix this question from Tito's answer. I post these code, hope it can help somebody have the same question.
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Please insert stock no:')
set_price = '%.2f' % int(input('Please set notification price:'))
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
return li, tittle
schedule.every(5).minutes.do(get_stock_price)
while True:
try:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()[0]
if set_price == current_price:
msg_text = get_stock_price()[1] + \
'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
except:
print('It is not working...')

Call a class from another script

I have found a peace of code I can use in my own script, I am not quite sure how the code all works, but it does :-), but as a newbee I dont know how to call it, it look like this:
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import unittest
import time
import util
from random import randint
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
url = "https://www.(a login page)
# Login on Stockopedia
self.driver.find_element_by_id('username').send_keys('XXXXXXXXXXXXX')
self.driver.find_element_by_id('password').send_keys('XXXXXXXXXXXXX')
self.driver.find_element_by_id('auth_submit').click()
time.sleep(5)
# Indsæt tickerkode
self.driver.find_element_by_name('searchQuery').send_keys(var1, Keys.ENTER)
time.sleep(5)
self.driver.find_element_by_name('searchQuery').send_keys('', Keys.ENTER)
time.sleep(randint(10, 60))
util.fullpage_screenshot(self.driver, "test.jpg")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
Can anybody help me, so I can call it from another script with variable var1 as a argument
I have now add the script that call the Class, the call is between the 2 row´s of stars *.
But as I see it, not even an instance of the class is created, what do I do wrong ?
import bs4 as bs
import datetime as dt
import os
import logging
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import re
import test
import fix_yahoo_finance as yf
import time
yf.pdr_override
# ticker_index indeholder: stien til det website den skal hente tickerkoderne,
# klassenavnet på den tabel tickerkoderne i, nummer på den kolonne i tabellen
# tickerkoderne ligger i, og navnet på den fil programmet skal ligge
# tickerkoderne i
# ticker_indexes = [['dk_large_cap', 'http://www.nasdaqomxnordic.com/index/index_info?Instrument=SE0001776667',
# 'tablesorter tablesorter-default', 1, 'dk_large_cap_tickers.pickle']]
ticker_indexes = [['c25', 'https://en.wikipedia.org/wiki/OMX_Copenhagen_25',
'wikitable sortable', 2, 'c25_tickers.pickle'],
['dax', 'https://en.wikipedia.org/wiki/DAX',
'wikitable sortable', 3, 'dax_tickers.pickle'],
['sto30', 'https://da.wikipedia.org/wiki/OMXS30',
'wikitable sortable', 2, 'sto30_tickers.pickle'],
['obx25', 'https://en.wikipedia.org/wiki/OBX_Index',
'wikitable sortable', 2, 'obx25_tickers.pickle'],
['nasdaq100', 'https://www.cnbc.com/nasdaq-100/',
'data quoteTable', 0, 'nasdaq100.pickle']]
logging.basicConfig(filename='Share prices logfile.log', level=logging.INFO,
format='%(asctime)s: %(levelname)s: %(message)s')
def save_index_tickers(indexname, tickerpath, table_class_id, tickercol,
tickerlist):
try:
resp = requests.get(tickerpath)
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': table_class_id})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[tickercol].text.replace('.', '-')
ticker = ticker.strip('\n')
if (indexname == 'sto30') or (indexname == 'obx25'):
ticker = ticker[1:]
tickers.append(ticker)
print(ticker)
with open('C:\\Users\\Johnn\\Desktop\\FA Sheet\\pickle/' + tickerlist, "wb") as f:
pickle.dump(tickers, f)
logging.info(str(indexname) + ' ' + str(tickerlist) + ' OK')
return tickers
except Exception as e:
logging.warning(str(indexname) + str(tickerlist) + str(e))
# save__screendump
def get_scrdump_from_stop(indexname, tickerpath, table_class_id, tickercol,
tickerlist, reload=False):
try:
if reload:
logging.info('RELOAD ' + str(indexname) + str(tickerlist))
tickers = save_index_tickers(indexname, tickerpath, table_class_id,
tickercol, tickerlist)
else:
with open('C:\\Users\\Johnn\\Desktop\\FA Sheet\\pickle/' + tickerlist, "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('C:\\Users\\Johnn\\Desktop\\FA Sheet\\Sheet'):
os.makedirs('C:\\Users\\Johnn\\Desktop\\FA Sheet\\Sheet')
# ******************************************************************************
for ticker in tickers:
obj = test.Test(var1)
obj.setUp()
obj.test_fullpage_screenshot()
obj.tearDown()
#*******************************************************************************
logging.info(str(indexname) + ' Sheet downloaded OK')
except Exception as e:
logging.warning(str(indexname) + str(tickerlist) + str(e))
def main(ticker_indexes):
for ticker_index in ticker_indexes:
print('*****')
print(ticker_index[0])
print('*****')
save_index_tickers(ticker_index[0], ticker_index[1], ticker_index[2],
ticker_index[3], ticker_index[4])
get_scrdump_from_stop(ticker_index[0], ticker_index[1], ticker_index[2],
ticker_index[3], ticker_index[4])
logging.info('Finished')
main(ticker_indexes)
import your file as such
If the importing class is in another dir.
import dir1.dir2.filename
if you are calling the class from a file created in the same dir
import filename
To create an object of your file's class.
obj = filename.Test(var1)
Then the rest of the code will look like this.
obj.setUp()
obj.test_fullpage_screenshot()
obj.tearDown()
If you're wondering what self means in the code.
To execute the function in the class, you need the first argument which is the own class object self.
why we do this is so that you won't call the class like this, without instantiating the object.
filename.Test.setUp()

how to speed up my process

I wrote a script that will web scrape data for a list of stocks. The scraper has to get the data from 2 separate pages so each stock symbol must scrape 2 different pages. If I run the process on a list that is 1000 items long it will take around 30 minutes to complete. It's not horrible, I can set it and forget it, but I'm wondering if there is a way to speed up the process. Maybe store the data and wait to write it all at the end instead of on each loop? Any other ideas appreciated.
import requests
from BeautifulSoup import BeautifulSoup
from progressbar import ProgressBar
import csv
symbols = {'AMBTQ','AABA','AAOI','AAPL','AAWC','ABEC','ABQQ','ACFN','ACIA','ACIW','ACLS'}
pbar = ProgressBar()
with open('industrials.csv', "ab") as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(['Symbol','5 Yr EPS','EPS TTM'])
for s in pbar(symbols):
try:
url1 = 'https://research.tdameritrade.com/grid/public/research/stocks/fundamentals?symbol='
full1 = url1 + s
response1 = requests.get(full1)
html1 = response1.content
soup1 = BeautifulSoup(html1)
for hist_div in soup1.find("div", {"data-module-name": "HistoricGrowthAndShareDetailModule"}):
EPS5yr = hist_div.find('label').text
except Exception as e:
EPS5yr = 'Bad Data'
pass
try:
url2 = 'https://research.tdameritrade.com/grid/public/research/stocks/summary?symbol='
full2 = url2 + s
response2 = requests.get(full2)
html2 = response2.content
soup2 = BeautifulSoup(html2)
for div in soup2.find("div", {"data-module-name": "StockSummaryModule"}):
EPSttm = div.findAll("dd")[11].text
except Exception as e:
EPSttm = "Bad data"
pass
writer.writerow([s,EPS5yr,EPSttm])

Python script running via Task Manager dies after ~30 min of logging off Windows

I have set my windows power settings such that my computer never turns off and never goes to sleep. I've setup the following python script to run as a scheduled task, which runs fine until almost 1/2 hour after I've logged off my computer. It mysteriously stops. There is no error message in the Events log. The memory used by this process doesn't appear to be spiking. I don't know what's going on. Please help!
import datetime
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
import logging
import shutil
def get_soup(theurl):
html = urlopen(theurl)
return BeautifulSoup(html.read(), "lxml")
def output_data(soup, filename, fieldnames, rows_with_info, supervision_row_count):
with open(filename, 'a', newline='') as csvfile:
mywriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
mydict = {};
# Scraping first table
offender_table = soup.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="offender-search-details")
rows = offender_table.findAll(lambda tag: tag.name=='tr')
mydict[fieldnames[0]]= clean_data(name_and_id[0])
mydict[fieldnames[1]]= clean_data(name_and_id[1])
#lots of similar code removed for sake of brevity
mywriter.writerow(mydict)
start_id = 10
max_id = 199999
for the_id in range(start_id, max_id):
logger.info('running with id-' + str(the_id))
theurl=thebaseurl + str(the_id)
soup = get_soup(theurl)
sentence_table = soup.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="offender-search-sentence-info")
if sentence_table:
logger.info('found data for id-' + str(the_id))
sentence_rows = sentence_table.findAll(lambda tag: tag.name=='tr')
supervision_row_count = 0
for the_row_index in range(0, len(sentence_rows)):
col_count = sentence_rows[the_row_index].find_all('td')
if (len(col_count) == 2):
supervision_row_count = supervision_row_count + 1
supervision_row_count = supervision_row_count -1
rows_with_info = len(sentence_rows) - 4 - supervision_row_count
output_data(soup, filename, fieldnames, rows_with_info, supervision_row_count)
logger.info('finished-' + str(datetime.datetime.now()))

Categories

Resources