I'm doing some web scraping from yahoo finance (NVIDIA Stocks) and I'm wondering why when i run my code i get always the same value, but in my browser when i refresh the page i get differents values (as it should be), how can i fix it?
import requests
from datetime import datetime
import time
def Is_Number(string):
try:
int(string)
return True
except:
if(string == '.'):
return True
else:
return False
session = requests.Session()
for i in range(10):
Response = session.get("https://finance.yahoo.com/quote/NVDA?p=NVDA")
KeyWord = 'data-pricehint'
Index = Response.text.find(KeyWord) + 26
GoOn = True
CurrentPrice = ""
while(GoOn == True):
if ( Is_Number(Response.text[Index])):
CurrentPrice = CurrentPrice + Response.text[Index]
Index = Index + 1
else:
GoOn = False
CurrentTime = datetime.now().strftime('%H:%M:%S')
print("# Price:",CurrentPrice,"at",CurrentTime)
time.sleep(10)
Why don't you try yfinance instead?:
pip install yfinance
import yfinance as yf
import time
def get_price() -> float:
return yf.Ticker("NVDA").info.get("regularMarketPrice")
def run():
for i in range(10):
print(f"{i}: {get_price()}")
time.sleep(10)
if __name__ == '__main__':
run()
Related
I have a small script that I have written (see below) which fetches JSON data from a web url. The goal is to print out any new data in the JSON. Is there any way I can continuously check the URL every 5 seconds and report back any changes? I am sure I am not doing it right, but what i have tried is creating a first list from the JSON object items, waiting 5 seconds creating a second list and then comparing the two. This is obviously not the way to do it because I still have to run the script myself each time. I just want to run the script once have it kind of 'listen' or 'poll' the URL and throw back any changes in data. My code is below, any assistance is greatly appreciated and any other optimizations you would suggest for my script.
import json, requests
import time
urls=["https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=1&sparkline=false", "https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=2&sparkline=false"]
def get_data(url):
url = requests.get(url)
text = url.text
data = json.loads(text)
coins = [coin['id'] for coin in data]
return coins
def check_new_coins():
first_list = get_data(url)
time.sleep(5)
second_list= get_data(url)
new_coins = list(set(second_list).difference(first_list))
if len(new_coins) > 0:
for new_coin in new_coins:
print(new_coin)
else:
print("No new coins")
for url in urls:
check_new_coins()
I think that's what you're looking for
import json, requests
import time
class Checker:
def __init__(self, urls, wait_time):
self.wait_time = wait_time
self.urls = urls
self.coins = self.get_coins()
self.main_loop()
#staticmethod
def get_data(url):
url = requests.get(url)
text = url.text
data = json.loads(text)
coins = [coin['id'] for coin in data]
return coins
def get_coins(self):
coins = set()
for url in self.urls:
coins.update(Checker.get_data(url))
return coins
def check_new_coins(self):
new_coins = self.get_coins()
coins_diff = list(new_coins.difference(self.coins))
current_time = time.strftime("%H:%M:%S", time.localtime())
if len(coins_diff) > 0:
print(current_time, coins_diff)
else:
print(current_time, "No new coins")
self.coins = new_coins
def main_loop(self):
while True:
time.sleep(self.wait_time)
self.check_new_coins()
if __name__ == '__main__':
urls=[
"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=1&sparkline=false",
"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=2&sparkline=false"
]
Checker(urls, 5)
sample output:
18:57:20 No new coins
18:57:25 No new coins
18:57:30 No new coins
18:57:35 No new coins
18:57:41 No new coins
18:57:46 No new coins
18:57:51 No new coins
18:57:56 No new coins
I'm very new to python, but I've made a lot of progress over the last few days. The below script works fine, but I just can't figure out how implement code that would print an incremented number every time 'avail' is equal to NO. I'd like to have it print something like 'None Available 1' on the first loop, then 'None Available 2' on the second loop, then 'None Available 3' on the third loop, etc..
import requests
import time
import subprocess
from bs4 import BeautifulSoup
def get_page(url):
response = requests.get(url)
if not response.ok:
print('Server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'lxml')
return soup
def get_detail_data(soup):
avail = soup.find('span', id='availability').text.strip()
if avail == "YES":
return True
elif avail == "NO":
print('None Available')
return False
else:
print("Unexpected value")
return None
def main():
url ='https://www.blahblah.com'
while True:
is_available = get_detail_data(get_page(url))
if is_available:
subprocess.call(["C:\\temp\\filename.bat"], shell=False)
break
time.sleep(2)
if __name__ == '__main__':
main()
The following would probably work, but there might be a better way to structure it.
_not_avail_counter = 0
def get_detail_data(soup):
avail = soup.find('span', id='availability').text.strip()
if avail == "YES":
return True
elif avail == "NO":
_not_avail_counter += 1
print('None Available ' + str(_not_avail_counter))
return False
else:
print("Unexpected value")
return None
I would suggest changing your while True loop into a for loop on an itertools.count iterator. You can pass the value from the count to the get_detail_data function with an argument.
import itertools
def get_detail_data(soup, count): # take the count as an argument
avail = soup.find('span', id='availability').text.strip()
if ...
# ...
elif avail == "NO":
print('None Available', count) # include count here (and anywhere else you want)
# ...
def main():
url ='https://www.blahblah.com'
for c in itertools.count(): # produce the count in a loop
is_available = get_detail_data(get_page(url), c)
# ...
Note that itertools.count starts counting a zero. If you want to start at 1 (like a human usually would when counting things), you may want to pass 1 as the start argument: for c in itertools.count(1).
Updated selenium script with opensource autoliker, i will post social network, username and password because it hard to understan what happens real life, edited some code scrolls still same result only first post liked.
only thing i want is to click like button on
a.find_element_by_css_selector("span#like-button.btn.btn-default.stat-item")
and keep scroling, each like button has same atribute
social network hiall.app
username -whatl0ol
password - tornike123
trying to update source, will post results
source code
from selenium import webdriver
from selenium.common import exceptions
from selenium.webdriver.chrome.options import Options
class FacebookBot:
def __init__(self,username,password,status_report=False):
self.username = username
self.password = password
self.status_report = status_report
options = Options()
options.add_argument("--disable-notifications")
if self.status_report: print("Opening chromedriver...")
self.wd = webdriver.Chrome(chrome_options=options)
if self.status_report: print("Logging in...")
self.login()
def login(self):
self.wd.get("https://hiall.app")
self.wd.find_element_by_name("username").send_keys(self.username)
self.wd.find_element_by_name("password").send_keys(self.password)
self.wd.find_element_by_css_selector("button.btn.btn-main").click()
def convert_to_int(self,string):
try:
return int(string)
except ValueError:
if string.lower().endswith("k"):
string = string[:-2]
try:
return int(float(string)*1000)
except ValueError:
return int(string)*1000
def get_posts(self):
articles = self.wd.find_elements_by_css_selector("span#like-button.btn.btn-default.stat-item")
data = []
for a in articles:
if a.get_attribute("id").startswith("span#like-button.btn.btn-default.stat-item"):
likeIts = [i.get_attribute("aria-label").split() for i in a.find_elements_by_css_selector("span#like-button.btn.btn-default.stat-item") if i.get_attribute("aria-label")]
likes = {"Like":0,"Love":0,"Haha":0,"Wow":0,"Sad":0,"Angry":0}
likes.update({i[1]: self.convert_to_int(i[0]) for i in likeIts})
try:
button = a.find_element_by_css_selector("span#like-button.btn.btn-default.stat-item")
except exceptions.NoSuchElementException:
continue
data.append({"likes":likes,"button":button,"article":a})
return data
def scroll(self,page_end=100):
find_elem = None
scroll_from = 0
scroll_limit = self.wd.execute_script("return document.body.scrollHeight")
i = 0
while not find_elem:
self.wd.execute_script("window.scrollTo(%d, %d);" % (scroll_from, scroll_from + scroll_limit))
scroll_from += scroll_limit
i += 1
if page_end and i >= page_end:
break
try:
find_elem = self.wd.find_element_by_css_selector("span#like-button.btn.btn-default.stat-item")
find_elem.click()
except exceptions.ElementNotVisibleException:
find_elem = None
except exceptions.NoSuchElementException:
find_elem = None
def automate(self,unlike=False,page_end=100):
if self.status_report: print("Forcing Facebook to load the posts...")
self.scroll(page_end)
if self.status_report: print("Scrolled down %s times" % page_end)
if self.status_report: print("%s posts..." % ("Unliking" if unlike else "Liking"))
self.wd.execute_script("window.scrollTo(0,0);")
posts = self.get_posts()
num = 0
for p in posts:
if p["likes"]["Angry"] == 0 and p["likes"]["Sad"] == 0 and p["likes"]["Like"] >= 5:
article = p["article"]
self.wd.execute_script("arguments[0].scrollIntoView();", article)
button = article.find_element_by_css_selector("span#like-button.btn.btn-default.stat-item")
if true:
#button.get_attribute("aria-pressed") == ("true" if unlike else "false"):
num += 1
self.wd.execute_script("arguments[0].click();",button)
try:
p = article.find_element_by_tag_name("p").get_attribute("innerText")
p = p.replace("\n"," ").encode().decode("utf-8")
except exceptions.NoSuchElementException:
p = ""
except:
p = ""
if self.status_report: print(' - %s "%s"' % ("Unliked" if unlike else "Liked",p))
if self.status_report: print("%s %s posts" % ("Unliked" if unlike else "Liked",num))
def close(self):
self.wd.close()
username = "whatl0ol"
password = "tornike123"
pages = False
while pages is False:
inp = input("How many pages to go through? (default 100, 'all' for whole News Feed): ")
if inp.isdigit():
pages = int(inp)
elif inp == "all":
pages = None
unlike = None
while unlike is None:
inp = input("Do you want to Like (l) or Unlike (u) posts? ")
if inp == "l":
unlike = False
elif inp == "u":
unlike = True
bot = FacebookBot(username,password,status_report=True)
bot.automate(unlike=unlike, page_end=pages)
print("Finished")
print()
input("Return to exit")
try:
bot.close()
except:
pass
I'm making a wikipedia crawler but it's very slow. How can I make it faster?
I'm using requests module and beautifulsoup4 to parse the html pages. I've tried implementing multithreading but it's still slow.
import requests
from bs4 import BeautifulSoup as bs
from queue import Queue
baseURL = "https://en.wikipedia.org";
startURL = "/wiki/French_battleship_Courbet_(1911)"
endURL = "/wiki/Royal_Navy"
tovisit = Queue()
visited = []
def main():
if (not checkValid(startURL)) or (not checkValid(endURL)):
print("Invalid URLs entered.")
quit()
initCrawler(startURL)
def initCrawler(startURL):
global tovisit
global visited
tovisit.put(startURL)
finished = False
while not finished:
if tovisit.empty():
finished = True
continue
url = tovisit.get()
childlinks = linkCrawl(url)
for i in childlinks:
tovisit.put(i)
visited.append(url)
def linkCrawl(url):
global visited
global tovisit
global endURL
print("crawling "+ url + "\n")
r = requests.get(baseURL+url)
soup = bs(r.content, "html.parser")
rawlinks = soup.find_all('a', href=True)
refinedlinks = []
for rawLink in rawlinks:
i = rawLink["href"]
if i is None:
continue
# ensure what we have is a string
if not (type(i) is str):
continue
# no poi
if i in visited:
continue
if i in list(tovisit.queue):
continue
if not checkValid(i):
continue
if i == endURL:
print("yay")
exit()
refinedlinks.append(i)
return refinedlinks
def checkValid(url):
if not url.startswith("/wiki/"):
return False
if url.startswith("/wiki/Special:"):
return False
if url.startswith("/wiki/Wikipedia:"):
return False
if url.startswith("/wiki/Portal:"):
return False
if url.startswith("/wiki/File:"):
return False
if url.endswith("(disambiguation)"):
return False
return True
if __name__ == "__main__":
main()
I expect the bot to run faster, but it's actually slow. Research says that eventually multithreading won't be enough.
I have to run the code every second and compare if the return values are equal or not for the current execution and a second before. I tried threading but i couldn't find a way to store the values to compare.
import numpy as np
import random
def ran ():
a = random.randint(1,101)
return a
ok i wrote some similar code a few years ago i hope it helps
import numpy as np
import random
import time
def ran ():
a = random.randint(1,101)
print(a)
return a
def chk(a,b):
if a==b:
return True
else:
return False
while True:
x=ran()
time.sleep(1)
x3=ran()
s=chk(x,x3)
if s==True:
print("Both numbers are same")
else:
print("Not Equall")
What about this:
import random
import time
def ran():
a = random.randint(1, 101)
return a
previous_rand = None
while True:
new_rand = ran()
if previous_rand and previous_rand == new_rand:
print 'Equal!'
else:
print 'Not equal!'
previous_rand = new_rand
time.sleep(1)