How to stop API feed from disconnecting without showing errors - python

I have constructed a news feed from a news agency which reads headlines as they're published (I'd rather not say which one for privacy reasons). One of the drawbacks of this API is that, unlike the filtered-stream from the Twitter API, it explicitly says in the documentation for this news agency's API that one needs to continuously fetch individual headlines in order to "simulate a wire feed". Thus, I have constructed a while loop that continuously fetches headlines and prints them to the terminal.
import config # The .py file that contains the username and password for my account with this service.
username = config.username
password = config.password
values = {
'username': username,
'password': password,
'format':'json'
}
# Functions
# The API requires fetching an authentication token using your username and password, which is regenerated every 24 hours.
def get_token():
token = ''
parameters = values
url = '<URL>'
response = requests.get(url, params=parameters)
token = response.json()['authToken']['authToken'].replace('=', '')
return token
# The API has multiple possible channels (though my subscription only has access to one),
#so I fetch the available channels using the authentication token obtained by the previous function.
def get_channel():
token = get_token()
parameters = {'token': token}
url = '<URL>'
response = requests.get(url, params=parameters)
dict_data = xmltodict.parse(response.content)
channel = dict_data['availableChannels']['channelInformation']['alias']
return channel
def get_headline():
url = '<URL>'
channel = get_channel()
token = get_token()
values = {'channel':channel, 'token':token, 'limit':1, 'maxAge':'10s'}
response = requests.get(url, params=values)
if response.status_code != 200:
raise Exception(
f"Cannot get stream (Error {response.status_code}): {response.text}"
dict_data = xmltodict.parse(response.content)
return dict_data
# According to the docs, the only way to simulate an actual live feed is to make constant requests for headlines,
#so I constructed a while loop that fetches headlines and then adds each headline to a set
#and compares each new headline that is fetched with the set to see if it has already been fetched.
#If it has, then the headline is printed.
def api_stream():
print('Connected to Stream!')
new_headline = ''
data = set()
while True:
dict_data = get_headline()
try:
# News headlines are classified on a scale of priority from 1-4 (1 being highest priority).
# Since I am only interested in headlines for breaking news, I only headlines of priorities 1, or 2.
if int(dict_data['results']['result']['priority']) < 3:
new_headline = dict_data['results']['result']['headline']
else:
pass
if new_headline not in data and new_headline != '':
print(new_headline)
data.add(new_headline)
except KeyError:
continue
# The get_headline() function fetches headlines that are only 10 seconds old, so if the most recent headline is older than it, it will return a KeyError saying that ['result'] is not a key in the dictionary, so I handle the exception this way.
This code usually works throughout the day but overnight starting at around midnight until around 7:30am, it will stop printing headlines without displaying an error message. I've tried a number of different things, such as containing this in another while loop, adding a second except block which calls the api_stream() function in the event of an error but nothing's worked, it just stops fetching headlines without warning.

Related

How to pass argument values into argparse with Python, to connect to an API?

In the process of learning scripting with Python and I have been trying to understand how to connect to an API, specifically this one: https://leagueapps.com/api-documentation/. I was given a sample Python script from the company to connect with and use their API as follows:
#!/usr/bin/env python
# Example of exporting registrations, members, and transactions with batched
# results. A limited number of results are returned in each response. It can
# vary based on the type, but is generally around 1000 records.
# ubuntu 16.04: sudo apt install python-jwt python-crypto python-requests
# untested: pip install pyjwt crypto requests2
import argparse
import time
import random
import jwt
import requests
parser = argparse.ArgumentParser()
parser.add_argument('--site-id', type=int, required=True)
parser.add_argument('--client-id', required=True, help='client id for site. Probably the same as the certificate filename basename')
parser.add_argument('--pem-file', required=True, help='filename for certificate key in PEM format')
parser.add_argument('--type', required=True, choices=['registrations-2','members-2','transactions-2', 'accountingCodes'], help='type of records to export')
parser.add_argument('--domain', default='leagueapps.io')
parser.add_argument('--auth', default='https://auth.leagueapps.io')
args = parser.parse_args()
if args.auth:
print("using auth server {}".format(args.auth))
auth_host=args.auth
if args.domain == 'lapps-local.io':
# for local testing the Google ESP isn't HTTPS
admin_host='http://admin.{}:8082'.format(args.domain)
else:
admin_host='https://admin.{}'.format(args.domain)
site_id=args.site_id
record_type=args.type
# Make a request to the OAuth 2 token endpoint with a JWT assertion to get an
# access_token
def request_access_token(auth_host, client_id, pem_file):
with open(pem_file, 'r') as f:
key = f.read()
now = int(time.time())
claims = {
'aud': 'https://auth.leagueapps.io/v2/auth/token',
'iss': client_id,
'sub': client_id,
'iat': now,
'exp': now + 300
}
assertion = jwt.encode(claims, key, algorithm='RS256')
auth_url = '{}/v2/auth/token'.format(auth_host)
response = requests.post(auth_url,
data={ 'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion': assertion })
if response.status_code == 200:
return response.json()['access_token']
else:
print('failed to get access_token: ({}) {}'.format(response.status_code, response.text))
return None
# Calculate seconds to sleep between retries.
#
# slot_time is amount of time to for each slot and is multiplied by the slot
# random calculated slot to get the total sleep time.
#
# max_slots can be used to put an upper bound on the sleep time
def exponential_backoff(attempts, slot_time = 1, max_slots = 0):
if max_slots > 0:
attempts = min(attempts, max_slots)
return random.randint(0, 2 ** attempts - 1) * slot_time
# Initialize the last-updated and last-id query parameters to be used between
# requests. These should be updated after processing each batch of responses
# to get more results.
last_updated = 0
last_id = 0
access_token = None
batch_count = 0
# Maximum number of retries for a request
max_attempts=5
attempts=0
while attempts < max_attempts:
attempts += 1
# Get an access_token if necessary
if access_token is None:
print('requesting access token: {} {}'.format(args.client_id, args.pem_file))
access_token = request_access_token(auth_host, args.client_id, args.pem_file)
if access_token is None:
break
print('access token: {}'.format(access_token))
params={'last-updated': last_updated, 'last-id': last_id}
# set the access token in the request header
headers={ 'authorization': 'Bearer {}'.format(access_token) }
response = requests.get('{}/v2/sites/{}/export/{}'.format(admin_host, site_id, record_type), params=params, headers=headers)
# access_token is invalid, clear so next pass through the loop will get a new one
if response.status_code == 401:
print('error({}): {}'.format(response.status_code, response.text))
access_token = None
# immediately retry since it should get a new access token
continue
# Request can be retried, sleep before retrying
if response.status_code == 429 or response.status_code >= 500:
# sleep an exponential back-off amount of time
wait_seconds = exponential_backoff(attempts, 1.42, 5)
print('retry in {} on error status ({}): {}'.format(wait_seconds, response.status_code, response.reason))
time.sleep(wait_seconds)
continue
# error on request that can't be retried
if response.status_code != 200:
print('unexpected error ({}): {}'.format(response.status_code, response.reason))
# reasonably some sort of coding error and retry is likely to fail
break
# get the actual response JSON data
records = response.json()
# No more records, exit.
if (len(records) == 0):
print('done.')
break
batch_count += 1
# successful request, reset retry attempts
attempts = 0
# process the result records and do useful things with them
print('processing batch {}, {} records'.format(batch_count, len(records)))
printFile = open("records.json","w+")
def remove_uni(s):
s2 = s.replace("u'", "'")
s2 = s2.replace('u"', '"')
return s2
printFile.write("[")
for record in records:
#print(remove_uni(str(record)));
#print('record id: {}, {}'.format(record['id'], record['lastUpdated']))
# track last_updated and last_id so next request will fetch more records
last_updated = record['lastUpdated']
last_id = record['id']
printFile.write(remove_uni(str(record)) + ",")
printFile.write("]")
printFile.close()
I can't seem to get this code to work and the error I get is:
usage: Main [-h] --site-id SITE_ID --client-id CLIENT_ID --pem-file PEM_FILE
--type {registrations-2,members-2,transactions-2,accountingCodes}
[--domain DOMAIN] [--auth AUTH]
Main: error: the following arguments are required: --site-id, --client-id, --pem-file, --type
I have tried to figure out how to pass values for the arguments, but it's not clear to me where they get placed in this script and can't find an answer after many searches and reading tutorials.
Can someone show me how to solve this or point to articles that will help me understand enough to do so? I wondered if I should learn what all of this sample code means in detail first, but for sake of getting results was going to focus on just getting it working...if you think I should take the former approach versus the later or vice versa, I'd love to know that too given I'm a beginner.
Thanks!
Gabe
So with the help of #ndc85430 I found the solution:
Because argparse enables your script to run with argument values entered via the command line, when you run your script you run your script in an editor you need to ensure the editor is feeding those values somehow. In PyCharm, you go to Run -> Edit configurations, then enter the values you want in your run configuration by entering them in the Parameters field. For example for the case I posted above, it would be --site-id (type SITE_ID here) --client-id (type CLIENT_ID here) --pem-file (type the PEM_FILE name if in the same directory or put directory location here to PEM_FILE) --type (type the type here for the option you selected out of the options you defined in argparse.
Once you enter your parameters and give the configuration an appropriate name, save it down. Then in Pycharm you'll have the option to run that configuration when you run and test your script.

JSONDecodeError when using for loop with firestore data iterating API request

I'm lost as to why this error keeps happening and how to solve it.
I'm trying to take data out of one of my firestore collections, using the ID (which is a stock ticker), and iterating that ID through a for loop to an API that returns a JSON array.
Every time I run this, about a third of the way through I'll get the following error, first showing up as Error: 404, then displays the following:
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The script works for the first third of the data, but if I delete items in the collection around where the error is, it doesn't resolve the issue so I don't think it has to do with the item in the doc that it's landed on.
Am I missing something?
I tried putting an exception in for the 404 error, but either I implemented it badly, or it didn't solve the problem.
import requests
import json
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime
cred = credentials.Certificate("./serviceAccountKey.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
doc_ref1 = db.collection(u'Quiver').stream()
for doc in doc_ref1:
symbol = doc.id
api_url = "https://api.iextrading.com/1.0/stock/{}/company".format(symbol)
query_url = api_url
r = requests.get(query_url)
if r.status_code != 200:
print("Error:", r.status_code)
if r.status_code == 404:
print("Error:", r.status_code)
json_stock = r.json()
symbol = json_stock['symbol']
companyName = json_stock['companyName']
exchange = json_stock['exchange']
industry = json_stock['industry']
website = json_stock['website']
description = json_stock['description']
CEO = json_stock['CEO']
issueType = json_stock['issueType']
sector = json_stock['sector']
tags = json_stock['tags']
updateTime = datetime.datetime.now()
doc_ref = db.collection(u'Quiver').document(u'{}'.format(symbol))
doc_ref.set({
u'symbol':u'{}'.format(symbol),
u'Company Name':u'{}'.format(companyName),
u'Exchange':u'{}'.format(exchange),
u'Industry':u'{}'.format(industry),
u'Website':u'{}'.format(website),
u'Description':u'{}'.format(description),
u'Issue Type':u'{}'.format(issueType),
u'Sector':u'{}'.format(sector),
u'Tags':u'{}'.format(tags),
u'Last Update Time':u'{}'.format(updateTime)
})
#docs = doc_ref.get({u'summary'})
print(symbol)
A request for stocks for a company that doesn't exist in the service records returns a 404.
print-ing to stdout when this happens isn't enough to handle this since the response body for non 200 status code isn't valid JSON text.
Depending on your business, you have to either skip non 200 responses, fetch the stock information from another service or log this as a critical issue so that you can apply a policy for companies whose stock information are no longer being offered by the service.
This first option to skip on non 200 responses can be done in the following clause.
if r.status_code != 200:
print("Error:", r.status_code)
continue

Obtain all Woocommerce Orders via Python API

I'm looking to export all orders from the WooCommerce API via a python script.
I've followed the
authentication process
and I have been using method to obtain orders described
here. My code looks like the following:
wcapi = API(
url = "url",
consumer_key = consumerkey,
consumer_secret = consumersecret
)
r = wcapi.get('orders')
r = r.json()
r = r['orders']
print(len(r)) # output: 8
This outputs the most recent 8 orders, but I would like to access all of them. There are over 200 orders placed via woocommerce right now. How do I access all of the orders?
Please tell me there is something simple I am missing.
My ultimate goal is to pull these orders automatically, transform them, and then upload to a visualization tool. All input is appreciated.
First: Initialize your API (as you did).
wcapi = API(
url=eshop.url,
consumer_key=eshop.consumer_key,
consumer_secret=eshop.consumer_secret,
wp_api=True,
version="wc/v2",
query_string_auth=True,
verify_ssl = True,
timeout=10
)
Second: Fetch the orders from your request(as you did).
r=wcapi.get("orders")
Third: Fetch the total pages.
total_pages = int(r.headers['X-WP-TotalPages'])
Forth: For every page catch the json and access the data through the API.
for i in range(1,total_pages+1):
r=wcapi.get("orders?&page="+str(i)).json()
...
The relevant parameters found in the corresponding documentation are page and per_page. The per_page parameter defines how many orders should be retrieved at every request. The page parameter defines the current page of the order collection.
For example, the request sent by wcapi.get('orders/per_page=5&page=2') will return orders 5 to 10.
However, as the default of per_page is 10, it is not clear as to why you get only 8 orders.
I encountered the same problem with paginated response for products.
I built on the same approach described by #gtopal, whereby the X-WP-TotalPages header returned by WooCommerce is used to iterate through each page of results.
I knew that I would probably encounter the same issue for other WooCommerce API requests (such as orders), and I didn't want to have to confuse my code by repeatedly performing a loop when I requested a paginated set of results.
To avoid this I used a decorator to abstract the pagination logic, so that get_all_wc_orders can focus just on the request.
I hope the decorator below might be useful to someone else (gist)
from woocommerce import API
WC_MAX_API_RESULT_COUNT = 100
wcapi = API(
url=url,
consumer_key=key,
consumer_secret=secret,
version="wc/v3",
timeout=300,
)
def wcapi_aggregate_paginated_response(func):
"""
Decorator that repeat calls a decorated function to get
all pages of WooCommerce API response.
Combines the response data into a single list.
Decorated function must accept parameters:
- wcapi object
- page number
"""
def wrapper(wcapi, page=0, *args, **kwargs):
items = []
page = 0
num_pages = WC_MAX_API_RESULT_COUNT
while page < num_pages:
page += 1
log.debug(f"{page=}")
response = func(wcapi, page=page, *args, **kwargs)
items.extend(response.json())
num_pages = int(response.headers["X-WP-TotalPages"])
num_products = int(response.headers["X-WP-Total"])
log.debug(f"{num_products=}, {len(items)=}")
return items
return wrapper
#wcapi_aggregate_paginated_response
def get_all_wc_orders(wcapi, page=1):
"""
Query WooCommerce rest api for all products
"""
response = wcapi.get(
"orders",
params={
"per_page": WC_MAX_API_RESULT_COUNT,
"page": page,
},
)
response.raise_for_status()
return response
orders = get_all_wc_orders(wcapi)

Querying JIRA via REST API and possible bad values in query

I have a portal users can access built on cherrypy which has some forms which can be submitted that will be sent to JIRA via the REST api for tracking purposes. Once it has been submitted I then take the information from the user supplied information on the form and that JIRA Issue ID and send them to an oracle DB.
As well, I then extended the functionality of the portal to be able to view the user submissions via a list page and then select a record to view what is stored in the DB for that submission. I had the idea to then use the REST API for JIRA to get what the status and assignee is for the Issue within JIRA. Converting my code to submit to the API to instead query it with the necessary JQL statement was fairly simple and can be seen below.
def jira_status_check(jira_id):
if jira_id != "No JIRA Issue":
try:
search_url = "https://myjirainstance.atlassian.net/rest/api/2/search/?jql=issue=" + jira_id + "&fields=status,assignee,resolution"
print search_url
username = 'some_user'
password = 'some_password'
request = urllib2.Request(search_url)
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
request.add_header("Content-Type", "application/json")
result = urllib2.urlopen(request).read()
json_results = json.loads(result)
print json_results
jira_status = json_results["issues"][0]["fields"]["status"]["name"]
if json_results["issues"][0]["fields"]["resolution"] is None:
tmp = "tmp"
if json_results["issues"][0]["fields"]["resolution"] is not None:
jira_status = jira_status + " - " + json_results["issues"][0]["fields"]["resolution"]["name"]
# assignee_name = "TEST"
# assignee_NT = "TEST"
if json_results["issues"][0]["fields"]["assignee"] is None:
assignee_name = "Unassigned"
assignee_NT = "Unassigned"
if json_results["issues"][0]["fields"]["assignee"] is not None:
assignee_name = json_results["issues"][0]["fields"]["assignee"]["displayName"]
assignee_NT = json_results["issues"][0]["fields"]["assignee"]["name"]
# if json_results["issues"][0]["fields"]["assignee"]["displayName"] is not None:
# assignee_name = json_results["issues"][0]["fields"]["assignee"]["displayName"]
# if json_results["issues"][0]["fields"]["assignee"] is None:
# assignee_NT = "Unassigned"
# if json_results["issues"][0]["fields"]["assignee"]["name"] is not None:
# assignee_NT = json_results["issues"][0]["fields"]["assignee"]["name"]
print jira_status
print assignee_name
print assignee_NT
output = [jira_status, assignee_name, assignee_NT]
except:
jira_status = "No JIRA Issue by that number or JIRA inaccessible"
assignee_name = "No JIRA Issue by that number or JIRA inaccessible"
assignee_NT = "No JIRA Issue by that number or JIRA inaccessible"
output = [jira_status, assignee_name, assignee_NT]
else:
jira_status = "No JIRA Issue"
assignee_name = "No JIRA Issue"
assignee_NT = "No JIRA Issue"
output = [jira_status, assignee_name, assignee_NT]
return output
However it was limited to searching a single record at a time, which works when you are only viewing the single record, but I was hoping to extend this possibly to my list page and searching many at once with one api query rather than tons of single issue queries. I am capable of using jql and the rest API to search with multiple Issue numbers at a link like this https://myjirainstance.atlassian.net/rest/api/2/search/?jql=Issue%3DSPL-3284%20OR%20Issue%3DSPL-3285&fields=status,assignee,resolution
But then I was thinking about what if somehow a bad Issue ID is saved and queried as a part of the massive query. Previously it was handled with the except statement in my jira_status_check function when it was a single record query. When I try to query the rest api with a link like the last one shared I instead get
{"errorMessages":["An issue with key 'SPL-6666' does not exist for field 'Issue'."],"warningMessages":[]}
I tried to build a query from an advanced search of issues but when I do something like Issue=SPL-3284 OR Issue=SPL-3285 OR Issue=SPL-6666 I get a response of An issue with key 'SPL-6666' does not exist for field 'Issue'.
Is there a correct way to search via JQL with multiple Issue numbers and give back no values for the fields for ones without matching issue numbers?
Or am I stuck with doing a ton of single issue queries to the api to cover my bases? This would be less than ideal, and might cause me to just limit the api queries to when a single record is viewed rather than the list page for usability.
Would I be better off moving my function to query JIRA to javascript/jquery that can populate the list of submissions after the page is rendered?
I ended up reaching out to Atlassian with my question about JQL and then was given the following rest api documentation and told about the validateQuery parameter to add to my JQL to achieve my search. https://docs.atlassian.com/jira/REST/6.1.7/
When I now use a query similar to this on my rest api link with my additional parameter
jql=Issue%3DSPL-3284 OR Issue%3DSPL-3285&fields=status,assignee,resolution&validateQuery=true
I get back a JSON with actual content for the issues which are valid and then a separate warningMessages object with any that are bad. An example JSON is below, but obviously $CONTENT would be actual results from the query
{
"expand": "schema,names",
"startAt": 0,
"maxResults": 50,
"total": 2,
"issues": [
{
$CONTENT
},
{
$CONTENT
}
],
"warningMessages": [
"An issue with key 'SPL-6666' does not exist for field 'Issue'."
]
}
Hopefully someone else will find this helpful in the future

How to pass oauth_callback value to oauth/request_token with Twython

Twitter just recently made the following mandatory:
1) You must pass an oauth_callback value to oauth/request_token. It's not optional. Even if you have one already set on dev.twitter.com. If you're doing out of band OAuth, pass oauth_callback=oob.
2) You must pass along the oauth_verifier you either received from your executed callback or that you received hand-typed by your end user to oauth/access_token.
Here is the twitter thread (https://dev.twitter.com/discussions/16443)
This has caused Twython get_authorized_tokens to throw this error:
Request: oauth/access_token
Error: Required oauth_verifier parameter not provided
I have two questions:
1. How do you pass the oauth_callback value to oauth/request_token with Twython?
2. How do you pass along the oauth_verifier?
I can get the oauth_verifier with request.GET['oauth_verifier'] from the callback url but I have no idea what to do from there using Twython. I've search everywhere but haven't found any answers so I decided to post this. This is my first post so please be kind ;)
Here is my code:
def register_twitter(request):
# Instantiate Twython with the first leg of our trip.
twitter = Twython(
twitter_token = settings.TWITTER_KEY,
twitter_secret = settings.TWITTER_SECRET,
callback_url = request.build_absolute_uri(reverse('account.views.twitter_thanks'))
)
# Request an authorization url to send the user to
auth_props = twitter.get_authentication_tokens()
# Then send them over there
request.session['request_token'] = auth_props
return HttpResponseRedirect(auth_props['auth_url'])
def twitter_thanks(request, redirect_url=settings.LOGIN_REDIRECT_URL):
# Now that we've got the magic tokens back from Twitter, we need to exchange
# for permanent ones and store them...
twitter = Twython(
twitter_token = settings.TWITTER_KEY,
twitter_secret = settings.TWITTER_SECRET,
oauth_token = request.session['request_token']['oauth_token'],
oauth_token_secret = request.session['request_token']['oauth_token_secret'],
)
# Retrieve the tokens
authorized_tokens = twitter.get_authorized_tokens()
# Check if twitter user has a UserProfile
try:
profile = UserProfile.objects.get(twitter_username=authorized_tokens['screen_name'])
except ObjectDoesNotExist:
profile = None
I solved my own answer. Here is the solution if it can help anyone else:
In the file Twython.py, I added a new parameter oauth_verifier to the Twython class constructor . I get the oauth_verifier value from the callback_url in my twitter_thanks view.
In get_authorized_tokens I removed this line of code:
response = self.client.get(self.access_token_url)
and added the following code:
callback_url = self.callback_url or 'oob'
request_args = urllib.urlencode({'oauth_callback': callback_url, 'oauth_verifier':self.oauth_verifier })
response = self.client.post(self.access_token_url, params=request_args)
It now works like a charm and is OAuth 1.0A compliant.

Categories

Resources