Continuously check a JSON url for changes in data using Python

Continuously check a JSON url for changes in data using Python - python

I have a small script that I have written (see below) which fetches JSON data from a web url. The goal is to print out any new data in the JSON. Is there any way I can continuously check the URL every 5 seconds and report back any changes? I am sure I am not doing it right, but what i have tried is creating a first list from the JSON object items, waiting 5 seconds creating a second list and then comparing the two. This is obviously not the way to do it because I still have to run the script myself each time. I just want to run the script once have it kind of 'listen' or 'poll' the URL and throw back any changes in data. My code is below, any assistance is greatly appreciated and any other optimizations you would suggest for my script.
import json, requests
import time
urls=["https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=1&sparkline=false", "https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=2&sparkline=false"]
def get_data(url):
url = requests.get(url)
text = url.text
data = json.loads(text)
coins = [coin['id'] for coin in data]
return coins
def check_new_coins():
first_list = get_data(url)
time.sleep(5)
second_list= get_data(url)
new_coins = list(set(second_list).difference(first_list))
if len(new_coins) > 0:
for new_coin in new_coins:
print(new_coin)
else:
print("No new coins")
for url in urls:
check_new_coins()

I think that's what you're looking for
import json, requests
import time
class Checker:
def __init__(self, urls, wait_time):
self.wait_time = wait_time
self.urls = urls
self.coins = self.get_coins()
self.main_loop()
#staticmethod
def get_data(url):
url = requests.get(url)
text = url.text
data = json.loads(text)
coins = [coin['id'] for coin in data]
return coins
def get_coins(self):
coins = set()
for url in self.urls:
coins.update(Checker.get_data(url))
return coins
def check_new_coins(self):
new_coins = self.get_coins()
coins_diff = list(new_coins.difference(self.coins))
current_time = time.strftime("%H:%M:%S", time.localtime())
if len(coins_diff) > 0:
print(current_time, coins_diff)
else:
print(current_time, "No new coins")
self.coins = new_coins
def main_loop(self):
while True:
time.sleep(self.wait_time)
self.check_new_coins()
if __name__ == '__main__':
urls=[
"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=1&sparkline=false",
"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=2&sparkline=false"
]
Checker(urls, 5)
sample output:
18:57:20 No new coins
18:57:25 No new coins
18:57:30 No new coins
18:57:35 No new coins
18:57:41 No new coins
18:57:46 No new coins
18:57:51 No new coins
18:57:56 No new coins

Related

Python - NameError: name 'q' is not defined

I'm writing a script in Python that prompts you to ask a question, and analyzes the AskReddit subreddit to and gives you a response. My code is:
import requests
import json
import random
#The main function that will grab a reply
def grab_reply(question):
#Navigate to the Search Reddit Url
r = requests.get('https://www.reddit.com/r/AskReddit/search.json?q=' + question + '&sort=relevance&t=all', headers = {'User-agent': 'Chrome'})
answers = json.loads(r.text) #Load the JSON file
Children = answers["data"]["children"]
ans_list= []
for post in Children:
if post["data"]["num_comments"] >= 5: #Greater then 5 or equal comments
ans_list.append (post["data"]["url"])
#If no results are found return "I have no idea"
if len(ans_list) == 0:
return "I have no idea"
#Pick A Random Post
comment_url=ans_list[random.randint(0,len(ans_list)-1)] + '.json?sort=top' #Grab Random Comment Url and Append .json to end
#Navigate to the Comments
r = requests.get(comment_url, headers = {'User-agent': 'Chrome'})
reply= json.loads(r.text)
Children = reply[1]['data']['children']
reply_list= []
for post in Children:
reply_list.append(post["data"]["body"]) #Add Comments to the List
if len(reply_list) == 0:
return "I have no clue"
#Return a Random Comment
return reply_list[random.randint(0,len(reply_list)-1)]
#Main Loop, Always ask for a question
while 1:
input("Ask me anything: ")
q=q.replace(" ", "+") #Replace Spaces with + for URL encoding
print(grab_reply(q)) #Grab and Print the Reply
After running the script in my terminal, I get this response:
NameError: name 'q' is not defined
I have managed to get most of the errors out of my script, but this one is driving me crazy. Help me out, stack overflow.

probably this will help
while True:
q = input("Ask me anything: ")

input("Ask me anything: ")
should be:
q = input("Ask me anything: ")
Since, you are not assigning the result of the input to any variable. q is undefined.

q is not defined yet. You should defined q before use it.

How to make my we crawler solve a wikipedia game faster?

I'm making a wikipedia crawler but it's very slow. How can I make it faster?
I'm using requests module and beautifulsoup4 to parse the html pages. I've tried implementing multithreading but it's still slow.
import requests
from bs4 import BeautifulSoup as bs
from queue import Queue
baseURL = "https://en.wikipedia.org";
startURL = "/wiki/French_battleship_Courbet_(1911)"
endURL = "/wiki/Royal_Navy"
tovisit = Queue()
visited = []
def main():
if (not checkValid(startURL)) or (not checkValid(endURL)):
print("Invalid URLs entered.")
quit()
initCrawler(startURL)
def initCrawler(startURL):
global tovisit
global visited
tovisit.put(startURL)
finished = False
while not finished:
if tovisit.empty():
finished = True
continue
url = tovisit.get()
childlinks = linkCrawl(url)
for i in childlinks:
tovisit.put(i)
visited.append(url)
def linkCrawl(url):
global visited
global tovisit
global endURL
print("crawling "+ url + "\n")
r = requests.get(baseURL+url)
soup = bs(r.content, "html.parser")
rawlinks = soup.find_all('a', href=True)
refinedlinks = []
for rawLink in rawlinks:
i = rawLink["href"]
if i is None:
continue
# ensure what we have is a string
if not (type(i) is str):
continue
# no poi
if i in visited:
continue
if i in list(tovisit.queue):
continue
if not checkValid(i):
continue
if i == endURL:
print("yay")
exit()
refinedlinks.append(i)
return refinedlinks
def checkValid(url):
if not url.startswith("/wiki/"):
return False
if url.startswith("/wiki/Special:"):
return False
if url.startswith("/wiki/Wikipedia:"):
return False
if url.startswith("/wiki/Portal:"):
return False
if url.startswith("/wiki/File:"):
return False
if url.endswith("(disambiguation)"):
return False
return True
if __name__ == "__main__":
main()
I expect the bot to run faster, but it's actually slow. Research says that eventually multithreading won't be enough.

Python - How to loop until site is down using requests and threads

So what I want to do is that I want to make a sorts of monitor a website that pick ups a random number. Before it does it needs to requests to the website to see whenever it is valid or not. When it is live it will generate random numbers 1-100 and I want it to check every random 3-6 second and then print again the number and repeat until the website is down.
What I have tried to do is following:
def get_product_feed(url_site):
thread = url_site
password = False #We start by giving a false/true value
while not password: #If it is not True then we do the for loop
available = False
while not available:
try:
checkpass = requests.get(thread, timeout=12) #We requests the site to see if its alive or not
if ('deadsite' in checkpass.url): #If it contains etc deadsite then we enter the while True
while True:
contcheck = requests.get(thread,timeout=12) #We make new requests to see if its dead.
if ('deadsite' in contcheck.url): #if it is, then we sleep 3-7sec and try again
randomtime = random.randint(3, 7)
time.sleep(randomtime)
else: #If its not containig it anymore then we send the bs4 value
available = True
bs4 = soup(contcheck.text, 'lxml')
return bs4
break
else: #If its having either of them then we send instant.
bs4 = soup(contcheck.text, 'lxml')
return bs4
break
except Exception as err:
randomtime = random.randint(1, 2)
time.sleep(randomtime)
continue
def get_info(thread):
while True:
try:
url = thread
resp = requests.get(url, timeout=12) #We requests to the website here
resp.raise_for_status()
json_resp = resp.json() #We grab the json value.
except Exception as err:
randomtime = random.randint(1,3)
time.sleep(randomtime)
continue
metadata = json_resp['number'] #We return the metadata value back to get_identifier
return metadata
def get_identifier(thread):
new = get_info(thread) #We requests the get_info(thread):
try:
thread_number = new
except KeyError:
thread_number = None
identifier = ('{}').format(thread_number) #We return back to script
return identifier
def script():
url_site = 'https://www.randomsitenumbergenerator.com/' #What url we gonna use
old_list = []
while True:
for thread in get_product_feed(url_site): #We loop to see through get_product_feed if its alive or not
if get_identifier(thread) not in old_list: # We then ask get_identifier(thread) for the values and see if its in the old_list or not.
print(get_identifier(thread)
old_list.append(get_identifier(thread)
I added a comment to make it easier to understand what is going on.
The issue I am having now that I am not able to make get_identifier(thread) to run until the website is down and I want it to continue to print out until the website is live til it dies and that is my question! What do I need to do to make it happen?
My thoughts was to add eventually threads maybe that 10 threads are checking at the same time to see if the website is dead or not and give back the value as a print but I am not sure if that is my solution for the question.

Python API error (3.6)

I've recently started learning Python API's and I've run into a problem while trying to access the HaveIBeenPwned API. I can get it to print the JSON data so I think it's a formatting problem? All other solutions seem to force me to rewrite my entire code only to find it doesn't work anyway or is incompatible.
#This program aims to provide 4 search functions by which users can test if their data is at risk.
import urllib.request as url
import json
import ast
def UsernameSearch():
print("Username search selected!")
def PasswordSearch():
print("Password search selected!")
def EmailSearch():
Username = input("Please enter the Email that's going to be searched \n: ")
def DataGetCurrent(Username):
SearchURL = "https://haveibeenpwned.com/api/v2/breachedaccount/{}".format(Username)
request = url.urlopen(url.Request(SearchURL, headers={'User-Agent' : "Mozilla/5.0"}))
data = request.read()
data = data.decode("utf-8")
json_data = json.loads(data)
return json_data[0]
Data = DataGetCurrent(Username)
a = ("Your Email address has been involved in [number] breaches: \nBreach \nTitle: {}\nWebsite: {}\nDate: {}\nInformation: {}\nLeaked Data: {}".format(Data['Title'],Data['Domain'],Data['BreachDate'],Data['Description'],Data['DataClasses']))
print(a)
def SiteSearch():
print("Website search selected!")
def loop():
try:
answer = input("There are currently 5 options: \n(1)Username search \n(2)Password search \n(3)Email search \n(4)Website search \n(5)Exit \n \n:")
if answer.upper() == "1":
UsernameSearch()
elif answer.upper() == "2":
PasswordSearch()
elif answer.upper() == "3":
EmailSearch()
elif answer.upper() == "4":
SiteSearch()
else:
print("\nThis is invalid, sorry. Please try again!\n")
loop()
except KeyboardInterrupt:
print("\nYou don't need to exit the program this way, there's an exit option; just type \"exit\"!\n")
loop()
loop()
The error it throws is:
TypeError: string indices must be integers
Edit:
Updated now and it does call some information however it only calls the first dictionary entry whereas I need it to call as many as there are (and preferably have a count variable sometimes).
I'm also having trouble selecting the "DataClasses" entry and printing the individual entities within.
All help is appreciated, thanks.

To convert a json string to dictionary, use json module (standard library):
import json
data_str = '{"index" : 5}'
json_dict = json.loads(data_str)
In your example:
import json
# ...
def DataGetCurrent(Username):
SearchURL = "https://haveibeenpwned.com/api/v2/breachedaccount/{}".format(Username)
request = url.urlopen(url.Request(SearchURL, headers={'User-Agent' : "Mozilla/5.0"}))
data = request.read()
data = data.decode("utf-8")
return json.loads(data)
EDIT
Apparently HaveIBeenPwned returns a list of dictionaries. Therefore, to get the results, you need to get the dictionary in the 0th index of the list:
def DataGetCurrent(Username):
SearchURL = "https://haveibeenpwned.com/api/v2/breachedaccount/{}".format(Username)
request = url.urlopen(url.Request(SearchURL, headers={'User-Agent' : "Mozilla/5.0"}))
data = request.read()
data = data.decode("utf-8")
json_list = json.loads(data)
return json_list[0]
EDIT 2
0th element of the list is only one of the results. To process all the results, the list itself should be returned and used accordingly.

Calling a function within a function within a class

So there's this website that posts something I want to buy at a random time of day for a limited amount of time and I want to write something to send a message to my phone when a new url is posted to that webpage.
I planned on doing this by counting the number of links on the page (since it's rarely updated) and checking it every 5 minutes against what it was 5 minutes before that, then 5 minutes later check it against what it was 10 minutes before that, 5 minutes later check what it was 15 minutes before that... and if it's greater than what it originally was, send a message to my phone. Here's what I have so far:
class url_alert:
url = ''
def link_count(self):
notifyy=True
while notifyy:
try:
page = urllib.request.urlopen(self.url)
soup = bs(page, "lxml")
links=[]
for link in soup.findAll('a'):
links.append(link.get('href'))
notifyy=False
print('found', int(len(links)), 'links')
except:
print('Stop making so many requests')
time.sleep(60*5)
return len(links)
def phone(self):
self= phone
phone.message = client.messages.create(to="", from_="",body="")
print('notified')
def looper(self):
first_count = self.link_count()
print('outside while')
noty = True
while noty:
try:
second_count = self.link_count()
print('before compare')
if second_count == first_count:
self.phone()
noty = False
except:
print('not quite...')
time.sleep(60)
alert = url_alert()
alert.looper()
As a test, I decided to set the if statement that determines whether or not to send a message as equal but the loop kept on running. Am I calling the functions within the looper function the right way?

It looks like you need to eliminate the try block, as it is now, if self.phone() takes an exception you will never leave the loop
def looper(self):
first_count = self.link_count()
while True:
if first_count != self.link_count():
self.phone()
break
time.sleep(60)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Continuously check a JSON url for changes in data using Python - python

Related

Python - NameError: name 'q' is not defined

How to make my we crawler solve a wikipedia game faster?

Python - How to loop until site is down using requests and threads

Python API error (3.6)

Calling a function within a function within a class

Categories

Resources