I have found a peace of code I can use in my own script, I am not quite sure how the code all works, but it does :-), but as a newbee I dont know how to call it, it look like this:
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import unittest
import time
import util
from random import randint
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
url = "https://www.(a login page)
# Login on Stockopedia
self.driver.find_element_by_id('username').send_keys('XXXXXXXXXXXXX')
self.driver.find_element_by_id('password').send_keys('XXXXXXXXXXXXX')
self.driver.find_element_by_id('auth_submit').click()
time.sleep(5)
# Indsæt tickerkode
self.driver.find_element_by_name('searchQuery').send_keys(var1, Keys.ENTER)
time.sleep(5)
self.driver.find_element_by_name('searchQuery').send_keys('', Keys.ENTER)
time.sleep(randint(10, 60))
util.fullpage_screenshot(self.driver, "test.jpg")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
Can anybody help me, so I can call it from another script with variable var1 as a argument
I have now add the script that call the Class, the call is between the 2 row´s of stars *.
But as I see it, not even an instance of the class is created, what do I do wrong ?
import bs4 as bs
import datetime as dt
import os
import logging
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import re
import test
import fix_yahoo_finance as yf
import time
yf.pdr_override
# ticker_index indeholder: stien til det website den skal hente tickerkoderne,
# klassenavnet på den tabel tickerkoderne i, nummer på den kolonne i tabellen
# tickerkoderne ligger i, og navnet på den fil programmet skal ligge
# tickerkoderne i
# ticker_indexes = [['dk_large_cap', 'http://www.nasdaqomxnordic.com/index/index_info?Instrument=SE0001776667',
# 'tablesorter tablesorter-default', 1, 'dk_large_cap_tickers.pickle']]
ticker_indexes = [['c25', 'https://en.wikipedia.org/wiki/OMX_Copenhagen_25',
'wikitable sortable', 2, 'c25_tickers.pickle'],
['dax', 'https://en.wikipedia.org/wiki/DAX',
'wikitable sortable', 3, 'dax_tickers.pickle'],
['sto30', 'https://da.wikipedia.org/wiki/OMXS30',
'wikitable sortable', 2, 'sto30_tickers.pickle'],
['obx25', 'https://en.wikipedia.org/wiki/OBX_Index',
'wikitable sortable', 2, 'obx25_tickers.pickle'],
['nasdaq100', 'https://www.cnbc.com/nasdaq-100/',
'data quoteTable', 0, 'nasdaq100.pickle']]
logging.basicConfig(filename='Share prices logfile.log', level=logging.INFO,
format='%(asctime)s: %(levelname)s: %(message)s')
def save_index_tickers(indexname, tickerpath, table_class_id, tickercol,
tickerlist):
try:
resp = requests.get(tickerpath)
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': table_class_id})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[tickercol].text.replace('.', '-')
ticker = ticker.strip('\n')
if (indexname == 'sto30') or (indexname == 'obx25'):
ticker = ticker[1:]
tickers.append(ticker)
print(ticker)
with open('C:\\Users\\Johnn\\Desktop\\FA Sheet\\pickle/' + tickerlist, "wb") as f:
pickle.dump(tickers, f)
logging.info(str(indexname) + ' ' + str(tickerlist) + ' OK')
return tickers
except Exception as e:
logging.warning(str(indexname) + str(tickerlist) + str(e))
# save__screendump
def get_scrdump_from_stop(indexname, tickerpath, table_class_id, tickercol,
tickerlist, reload=False):
try:
if reload:
logging.info('RELOAD ' + str(indexname) + str(tickerlist))
tickers = save_index_tickers(indexname, tickerpath, table_class_id,
tickercol, tickerlist)
else:
with open('C:\\Users\\Johnn\\Desktop\\FA Sheet\\pickle/' + tickerlist, "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('C:\\Users\\Johnn\\Desktop\\FA Sheet\\Sheet'):
os.makedirs('C:\\Users\\Johnn\\Desktop\\FA Sheet\\Sheet')
# ******************************************************************************
for ticker in tickers:
obj = test.Test(var1)
obj.setUp()
obj.test_fullpage_screenshot()
obj.tearDown()
#*******************************************************************************
logging.info(str(indexname) + ' Sheet downloaded OK')
except Exception as e:
logging.warning(str(indexname) + str(tickerlist) + str(e))
def main(ticker_indexes):
for ticker_index in ticker_indexes:
print('*****')
print(ticker_index[0])
print('*****')
save_index_tickers(ticker_index[0], ticker_index[1], ticker_index[2],
ticker_index[3], ticker_index[4])
get_scrdump_from_stop(ticker_index[0], ticker_index[1], ticker_index[2],
ticker_index[3], ticker_index[4])
logging.info('Finished')
main(ticker_indexes)
import your file as such
If the importing class is in another dir.
import dir1.dir2.filename
if you are calling the class from a file created in the same dir
import filename
To create an object of your file's class.
obj = filename.Test(var1)
Then the rest of the code will look like this.
obj.setUp()
obj.test_fullpage_screenshot()
obj.tearDown()
If you're wondering what self means in the code.
To execute the function in the class, you need the first argument which is the own class object self.
why we do this is so that you won't call the class like this, without instantiating the object.
filename.Test.setUp()
Related
Help what to do. When you try to search, it often displays a name with an encoding error in the squeak. That is, for example (УкÑаинÑкий VOD поÑÑаÐ)
Code
from base64 import encode
import requests
from lxml.html import fromstring
from googlesearch import search
from time import sleep as wait
import os
os.system('cls || clear')
query = input('Уведіть ключові слова : ')
list_url = []
while 1:
try:
col = int(input('Количество запросов : '))
break
except ValueError:
print('Введите число')
for j in search(query, tld="co.in", num=col, stop=col, pause=2):
list_url.append(j)
if list_url != []:
for i in list_url:
wait(0.1)
r = requests.get(i)
tree = fromstring(r.content)
Title = tree.findtext('.//title')
print(f'\r[{Title}] - {i}\n')
try:
os.remove('.google-cookie')
except FileNotFoundError:
pass
else:
print('Empty')
input('\nExit\n')
⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
I made a python function to get all of the categories and their child node until the last one. I want the output to be like this: {'https://www.amazon.ae/gp/bestsellers/appliances/': ['Heating And Cooling', 'https://www.amazon.ae/gp/bestsellers/appliances/12134072031']['Air Conditioners', 'https://www.amazon.ae/gp/bestsellers/kitchen/15298093031']['Cabinet Air Conditioners', 'https://www.amazon.ae/gp/bestsellers/kitchen/15298093031']
My code:
import requests
from bs4 import BeautifulSoup as bs
import time
from tqdm import tqdm
_seen_categories = []
def crawl(url):
r = requests.get(url)
time.sleep(2)
s = bs(r.text, "html.parser")
try:
treeitems = s.find("span", class_="_p13n-zg-nav-tree-all_style_zg-selected__1SfhQ").find_next("div", {"role": "group"}).find_all("div", {"role": "treeitem"})
except:
treetiems = None
fullDict = []
for treeitem in tqdm(treeitems):
a = treeitem.find_next("a")
d = {url:[a.text.strip(), a["href"]]}
fullDict.append(d)
print(a.text.strip())
print(a["href"])
if treeitems is not None:
next_url = "https://www.amazon.ae"+a['href']
try:
if next_url not in _seen_categories:
crawl(next_url)
except:
pass
else:
_seen_categories.append(next_url)
time.sleep(2)
crawl("https://www.amazon.ae/gp/bestsellers/appliances")
This function is not formatting as expected. Need help to complete this.
I set schedule 30 seconds, but data will write in csv every 1-2 seconds...
I try a lot of ways to fix it, but there is no use.
Hope my get data write in csv every 30 seconds.
Hope somebody can help me solve it, thanks a lot!
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
from datetime import datetime as dt
import csv
stock_no = input('Please insert stock no:')
set_price = '%.2f' % float(input('Please set notification price:'))
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
time_now = dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
with open('C:/Python workspace/stock_value_notification/index.csv', 'a', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([tittle, li, time_now])
return li, tittle, time_now
schedule.every(10).seconds.do(get_stock_price)
while True:
try:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()[0]
if set_price == current_price:
msg_text = get_stock_price()[1] + \
'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
except:
print('It is not working...')
break
this csv file is the result I program
You are using the schedule the wrong way.
I hope this pesudo code example will help you:
def scrape_and_save():
res = scrape()
save_to_csv(res)
schedule.every(10).seconds.do(scrape_and_save)
while True:
schedule.run_pending()
time.sleep(1)
You have to move the notify section into either a separate function or into get_stock_price(). Here is a working refactored code.
Schedule will call get_stock_price every interval and notification is later called once data is written into the o/p file.
Adjust your it according to your need further.
import bs4
import requests
import schedule
import time
from win10toast import ToastNotifier
from datetime import datetime as dt
import csv
import sys
stock_no = input('Please insert stock no:')
set_price = '%.2f' % float(input('Please set notification price:'))
def get_stock_price():
try:
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
time_now = dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')
with open('index.csv', 'a', newline='', encoding="utf-8") as csv_file:
writer = csv.writer(csv_file)
writer.writerow([tittle, li, time_now])
notify(li, tittle)
except Exception as e:
print('It is not working...')
print(e)
sys.exit()
def notify(current_price, stock_title):
if set_price == current_price:
msg_text = stock_title + \
' stock value is ' + current_price
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
schedule.every(10).seconds.do(get_stock_price)
while True:
schedule.run_pending()
time.sleep(1)
PS: webscraping activity may cause additional delay sometimes adding to the execution time.
I'm practicing to build a project. I want get the stock value every 5 min, if the price is the same with setting price, I will got an e-mail and desktop notification, but now I have some trouble...I don't know how to fix this...
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Plesae insert stock no:')
set_price = input('Please set notification price:')
def job():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
msg_text = tittle + 'stock value is ' + li
schedule.every(5).minutes.do(job)
while True:
schedule.run_pending()
time.sleep(1)
if set_price is li:
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
there's something wrong...like this
This is my problem
You have declared "li" and "msg_text" inside of the "job" function, that means that these variables are just available inside the "job" function.
There are many ways to solve this problem, I will just propose one to try to help you:
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Please insert stock no:')
set_price = input('Please set notification price:')
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
return li
schedule.every(5).minutes.do(job)
while True:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()
if set_price == current_price:
msg_text = tittle + 'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
I didn't test the code above but it may be useful to you understand the error that you posted.
Good luck and happy coding!
Finally, I fix this question from Tito's answer. I post these code, hope it can help somebody have the same question.
import bs4
import requests
import schedule
import time
import smtplib
import email.message
from win10toast import ToastNotifier
from function import send_email
stock_no = input('Please insert stock no:')
set_price = '%.2f' % int(input('Please set notification price:'))
def get_stock_price():
links = 'https://histock.tw/stock/%s' % stock_no
response = requests.get(links)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tittle = soup.find('h3').get_text().strip()
li = soup.find('span', id="Price1_lbTPrice").span.get_text()
return li, tittle
schedule.every(5).minutes.do(get_stock_price)
while True:
try:
schedule.run_pending()
time.sleep(1)
current_price = get_stock_price()[0]
if set_price == current_price:
msg_text = get_stock_price()[1] + \
'stock value is ' + current_price
send_email(msg_text)
toaster = ToastNotifier()
toaster.show_toast("Stock value notification",
msg_text,
duration=10)
except:
print('It is not working...')
I am trying to create a python script that complets th form on this page http://segas.gr/index.php/el/2015-best-athlete by selecting the radio button with label "Γιώργος Μηλιαράς (ΣΑΚΑ) 800 μ./1,500 μ./3,000 μ" aka id="male_kids_3".
Here is my code:
import urllib
import urllib2
import webbrowser
url = "http://segas.gr/index.php/el/2015-best-athlete"
data = urllib.urlencode({'male_kids_3': 'checked'})
results = urllib2.urlopen(url, data)
with open("results.html", "w") as f:
f.write(results.read())
webbrowser.open("results.html")
I found a solution using selinium
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
def malakia():
#Get url
browser.get("http://segas.gr/index.php/el/2015-best-athlete")
miliaras = browser.find_element_by_id("male_kids_3")
miliaras.click()
validate = browser.find_element_by_name("input_submit_4")
validate.click()
if __name__ == "__main__": #Because we are bad-ass and we know python
#Let's make sme magiKKK
times = int(input("Πόσες φορές θέλεις να ψηφίσεις τον G #babas ??\n"))
#create brwoser object
browser = webdriver.Chrome()
for i in range(times):
malakia()