Background: I have seen lots of examples of integrating a progress bar into a for loop, however nothing for my use case, and as such am looking for some advice.
For my use case, I am calling an API and testing if meta is in the response (meta = data I need). If meta is not in the API response, then the API returns a key pair value named percent_complete, which indicates the data I am trying to return is still aggregating, and provides a value on the progress of data aggregation.
Current code:
def api_call():
key, secret, url = ini_reader()
endpoint_url = endpoint_initializer()
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "111"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
print(f' Your data request for: {id_value} is {res2}% complete!')
time.sleep(60)
elif "meta" in api_response:
return api_response
What I am trying to achieve: {res2} *100 gives the percentage, which I would like to use the measure of progress in a progress bar.
Can anyone suggest an appropriate dependency to use?
You can use the Enlighten library. You can keep your print statements and have multiple progress bars at the same time without making any other changes. Below is an example of how you might implement it.
Based on your example it looks like id_value changes, so I wrote the example like that. If it doesn't change you can just use it in the description. And if you have multiples, you'd probably want to create a progress bar for each. If you want to remove your progress bars after they complete, just add leave=False to manager.Counter().
The library is very customizable and the documentation has a lot of examples.
import enlighten
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
def api_call():
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
...
while True:
...
if "meta" not in api_response:
...
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
else:
...
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response
Thanks to Aviso, and for everyone's benefit, here is the completed function -
def api_call():
endpoint_url = endpoint_initializer()
key, secret, url = ini_reader()
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
date = dt.datetime.today().strftime("%Y-%m-%d")
print("------------------------------------\n","API URL constructed for:", date, "\n------------------------------------")
print("-------------------------------------------------------------\n","Endpoint:", endpoint_url, "\n-------------------------------------------------------------")
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "381"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
time.sleep(60)
elif "meta" in api_response:
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response
Related
I was wondering if i could get some input from some season python exports, i have a couple questions
I am extracting data from an api request and calculating the total vulnerabilities,
what is the best way i can return this data so that i can call it in another function
what is the way i can add up all the vulnerabilities (right now its just adding it per 500 at a time, id like to do the sum of every vulnerability
def _request():
third_party_patching_filer = {
"asset": "asset.agentKey IS NOT NULL",
"vulnerability" : "vulnerability.categories NOT IN ['microsoft patch']"}
headers = _headers()
print(headers)
url1 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets"
resp = requests.post(url=url1, headers=headers, json=third_party_patching_filer, verify=False).json()
jsonData = resp
#print(jsonData)
has_next_cursor = False
nextKey = ""
if "cursor" in jsonData["metadata"]:
has_next_cursor = True
nextKey = jsonData["metadata"]["cursor"]
while has_next_cursor:
url2 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets?&size=500&cursor={nextKey}"
resp2 = requests.post(url=url2, headers=headers, json=third_party_patching_filer, verify=False).json()
cursor = resp2["metadata"]
print(cursor)
if "cursor" in cursor:
nextKey = cursor["cursor"]
print(f"next key {nextKey}")
#print(desktop_support)
for data in resp2["data"]:
for tags in data['tags']:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
if tags["name"] == 'OSSWIN':
print("OSSWIN")
critical_vuln_osswin = data['critical_vulnerabilities']
severe_vuln_osswin = data['severe_vulnerabilities']
modoer_vuln_osswin = data['moderate_vulnerabilities']
total_critical_vul_osswin.append(critical_vuln_osswin)
total_severe_vul_osswin.append(severe_vuln_osswin)
total_modoer_vuln_osswin.append(modoer_vuln_osswin)
print(sum(total_critical_vul_osswin))
print(sum(total_severe_vul_osswin))
print(sum(total_modoer_vuln_osswin))
if tags["name"] == 'DESKTOP_SUPPORT':
print("Desktop")
total_critical_vul_desktop = []
total_severe_vul_desktop = []
total_modorate_vuln_desktop = []
critical_vuln_desktop = data['critical_vulnerabilities']
severe_vuln_desktop = data['severe_vulnerabilities']
moderate_vuln_desktop = data['moderate_vulnerabilities']
total_critical_vul_desktop.append(critical_vuln_desktop)
total_severe_vul_desktop.append(severe_vuln_desktop)
total_modorate_vuln_desktop.append(moderate_vuln_desktop)
print(sum(total_critical_vul_desktop))
print(sum(total_severe_vul_desktop))
print(sum(total_modorate_vuln_desktop))
else:
pass
else:
has_next_cursor = False
If you have a lot of parameters to pass, consider using a dict to combine them. Then you can just return the dict and pass it along to the next function that needs that data. Another approach would be to create a class and either access the variables directly or have helper functions that do so. The latter is a cleaner solution vs a dict, since with a dict you have to quote every variable name, and with a class you can easily add additional functionally beyond just being a container for a bunch of instance variables.
If you want the total across all the data, you should put these initializations:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
before the while has_next_cursor loop (and similarly for the desktop totals). The way your code is currently, they are initialized each cursor (ie, each 500 samples based on the URL).
I'm calling a LinkedIn API with the code below and it does what I want.
However when I use almost identical code inside a loop it returns a type error.
it returns a type error:
File "C:\Users\pchmurzynski\OneDrive - Centiq Ltd\Documents\Python\mergedreqs.py", line 54, in <module>
auth_headers = headers(access_token)
TypeError: 'dict' object is not callable
It has a problem with this line (which again, works fine outside of the loop):
headers = headers(access_token)
I tried changing it to
headers = headers.get(access_token)
or
headers = headers[access_token]
EDIT:
I have also tried this, with the same error:
auth_headers = headers(access_token)
But it didn't help. What am I doing wrong? Why does the dictionary work fine outside of the loop, but not inside of it and what should I do to make it work?
What I am hoping to achieve is to get a list, which I can save as json with share statistics called for each ID from the "shids" list. That can be done with individual requests - one link for one ID,
(f'https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3A77487&ugcPosts=List(urn%3Ali%3AugcPost%3A{shid})
or a a request with a list of ids.
(f'https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3A77487&ugcPosts=List(urn%3Ali%3AugcPost%3A{shid},urn%3Ali%3AugcPost%3A{shid2},...,urn%3Ali%3AugcPost%3A{shidx})
Updated Code thanks to your comments.
shlink = ("https://api.linkedin.com/v2/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3A77487&shares=List(urn%3Ali%3Ashare%3A{})")
#loop through the list of share ids and make an api request for each of them
shares = []
token = auth(credentials) # Authenticate the API
headers = fheaders(token) # Make the headers to attach to the API call.
for shid in shids:
#create a request link for each sh id
r = (shlink.format(shid))
#call the api
res = requests.get(r, headers = auth_headers)
share_stats = res.json()
#append the shares list with the responce
shares.append(share_stats["elements"])
works fine outside the loop
Because in the loop, you re-define the variable. Added print statments to show it
from liapiauth import auth, headers # one type
for ...:
...
print(type(headers))
headers = headers(access_token) # now set to another type
print(type(headers))
Lesson learned - don't overrwrite your imports
Some refactors - your auth token isn't changing, so don't put it in the loop; You can use one method for all LinkedIn API queries
from liapiauth import auth, headers
import requests
API_PREFIX = 'https://api.linkedin.com/v2'
SHARES_ENDPOINT_FMT = '/organizationalEntityShareStatistics?q=organizationalEntity&organizationalEntity=urn%3Ali%3Aorganization%3A77487&shares=List(urn%3Ali%3Ashare%3A{}'
def get_linkedin_response(endpoint, headers):
return requests.get(API_PREFIX + endpoint, headers=headers)
def main(access_token=None):
if access_token is None:
raise ValueError('Access-Token not defined')
auth_headers = headers(access_token)
shares = []
for shid in shids:
endpoint = SHARES_ENDPOINT_FMT.format(shid)
resp = get_linkedin_response(endpoint, auth_headers)
if resp.status_code // 100 == 2:
share_stats = resp.json()
shares.append(share_stats[1])
# TODO: extract your data here
idlist = [el["id"] for el in shares_list["elements"]]
if __name__ == '__main__':
credentials = 'credentials.json'
main(auth(credentials))
I have this loop in my app.py. For some reason it extends the load time by over 3 seconds. Are there any solutions?
import dateutil.parser as dp
# Converts date from ISO-8601 string to formatted string and returns it
def dateConvert(date):
return dp.parse(date).strftime("%H:%M # %e/%b/%y")
def nameFromID(userID):
if userID is None:
return 'Unknown'
else:
response = requests.get("https://example2.org/" + str(userID), headers=headers)
return response.json()['firstName'] + ' ' + response.json()['lastName']
logs = []
response = requests.get("https://example.org", headers=headers)
for response in response.json():
logs.append([nameFromID(response['member']), dateConvert(response['createdAt'])])
It extends the load time by over 3 seconds because it does a lot of unnecessary work, that's why.
You're not using requests Sessions. Each request will require creating and tearing down an HTTPS connection. That's slow.
You're doing another HTTPS request for each name conversion. (See above.)
You're parsing the JSON you get in that function twice.
Whatever dp.parse() is (dateutil?), it's probably doing a lot of extra work parsing from a free-form string. If you know the input format, use strptime.
Here's a rework that should be significantly faster. Please see the TODO points first, of course.
Also, if you are at liberty to knowing the member id -> name mapping doesn't change, you can make name_cache a suitably named global variable too (but remember it may be persisted between requests).
import datetime
import requests
INPUT_DATE_FORMAT = "TODO_FILL_ME_IN" # TODO: FILL ME IN.
def dateConvert(date: str):
return datetime.datetime.strptime(date, INPUT_DATE_FORMAT).strftime(
"%H:%M # %e/%b/%y"
)
def nameFromID(sess: requests.Session, userID):
if userID is None:
return "Unknown"
response = sess.get(f"https://example2.org/{userID}")
response.raise_for_status()
data = response.json()
return "{firstName} {lastName}".format_map(data)
def do_thing():
headers = {} # TODO: fill me in
name_cache = {}
with requests.Session() as sess:
sess.headers.update(headers)
logs = []
response = sess.get("https://example.org")
for response in response.json():
member_id = response["member"]
name = name_cache.get(member_id)
if not name:
name = name_cache[member_id] = nameFromID(sess, member_id)
logs.append([name, dateConvert(response["createdAt"])])
I am having issues with my below API request to Flickr. My function takes as input a list of 10 photo ids. However when I print the data from my function I am only getting information based on 1 photo ID. Looking at my below function any ideas on what may be causing the contents of only 1 photo ID to print? Any help would be great.
for item in get_flickr_data(word)["photos"]["photo"]:
photo_ids =item["id"].encode('utf-8')
lst_photo_ids.append(photo_ids)
print lst_photo_ids
lst_photo_ids = ['34117701526', '33347528313', '34158745075', '33315997274', '33315996984', '34028007021', '33315995844', '33347512113', '33315784134', '34024299271']
def get_photo_data(lst_photo_ids):
baseurl = "https://api.flickr.com/services/rest/"
params_d = {}
params_d["method"] = "flickr.photos.getInfo"
params_d["format"] = "json"
params_d["photo_id"] = photo_ids
params_d["api_key"] = FLICKR_KEY
unique_identifier = params_unique_combination(baseurl,params_d)
if unique_identifier in CACHE_DICTION:
flickr_data_diction = CACHE_DICTION[unique_identifier]
else:
resp = requests.get(baseurl,params_d)
json_result_text = resp.text[14:-1]
flickr_data_diction = json.loads(json_result_text)
CACHE_DICTION[unique_identifier] = flickr_data_diction
fileref = open(CACHE_FNAME,"w")
fileref.write(json.dumps(CACHE_DICTION))
fileref.close()
return flickr_data_diction
print get_photo_data(photo_ids)
I'm trying to convert a dictionary output into a Dataframe.
For my specific project, I'm using the Bloomberg service API to request a handful of historical datapoints for a stock ticker. They give me the output in dictionary form, but I need to convert it into a more manageable DataFrame. So far all the solutions appear pretty complex. Is there a straightforward pythonic way of accomplishing this?
Thanks!
The snippet of code producing the output:
def main():
output = {}
options = parseCmdLine()
# Fill SessionOptions
sessionOptions = blpapi.SessionOptions()
sessionOptions.setServerHost(options.host)
sessionOptions.setServerPort(options.port)
print ("Connecting to %s:%s" % (options.host, options.port))
# Create a Session
session = blpapi.Session(sessionOptions)
# Start a Session
if not session.start():
print ("Failed to start session.")
return
try:
# Open service to get historical data from
if not session.openService("//blp/refdata"):
print ("Failed to open //blp/refdata")
return
# Obtain previously opened service
refDataService = session.getService("//blp/refdata")
# Create and fill the request for the historical data
request = refDataService.createRequest("HistoricalDataRequest")
request.getElement("fields").appendValue("BEST_SALES")
request.getElement("fields").appendValue("BEST_EBITDA")
request.getElement("fields").appendValue("BEST_EPS")
request.getElement("fields").appendValue("CURR_ENTP_VAL")
request.getElement("fields").appendValue("CUR_MKT_CAP")
request.getElement("fields").appendValue("LAST_PRICE")
# Elements passed to it
request.getElement("securities").appendValue("MSFT US Equity")
# Add overrides
overrides = request.getElement("overrides")
override1 = overrides.appendElement()
override1.setElement("fieldId", "BEST_FPERIOD_OVERRIDE")
override1.setElement("value", "1FY")
override2 = overrides.appendElement()
override2.setElement("fieldId", "BEST_CONSOLIDATED_OVERRIDE")
override2.setElement("value", "C")
override3 = overrides.appendElement()
override3.setElement("fieldId", "EQY_FUND_CRNCY")
override3.setElement("value", "USD")
# Add historical adjustments
request.set("periodicityAdjustment", "ACTUAL")
request.set("periodicitySelection", "DAILY")
request.set("maxDataPoints", 100)
## NEED TO PASS THIS
request.set("startDate", "20160106")
request.set("endDate", "20160107")
print ("Sending Request:", request)
# Send the request
session.sendRequest(request)
# Process received events
while(True):
# We provide timeout to give the chance for Ctrl+C handling:
ev = session.nextEvent(500)
for msg in ev:
print (msg)
if ev.eventType() == blpapi.Event.RESPONSE:
# Response completely received, so we could exit
break
finally:
# Stop the session
session.stop()
Output in dictionary form:
HistoricalDataResponse = {
securityData = {
security = "MSFT US Equity"
eidData[] = {
}
sequenceNumber = 0
fieldExceptions[] = {
}
fieldData[] = {
fieldData = {
date = 2016-01-06
BEST_SALES = 98338.750000
BEST_EPS = 3.108000
CURR_ENTP_VAL = 373535.702300
CUR_MKT_CAP = 431746.702300
LAST_PRICE = 54.050000
}
fieldData = {
date = 2016-01-07
BEST_SALES = 98351.040000
BEST_EBITDA = 37885.200000
BEST_EPS = 3.110000
CURR_ENTP_VAL = 358518.425700
CUR_MKT_CAP = 416729.425700
LAST_PRICE = 52.170000
}
}
}
}
You could take a look at the pdblp package (Disclaimer: I'm the author)
For your example something like should work (I don't currently have a bbg
connection though so haven't actually tested this)
import pdblp
con = pdblp.BCon()
con.start()
fields = ['BEST_SALES', 'BEST_EBITDA', 'BEST_EPS', 'CURR_ENTP_VAL',
'CUR_MKT_CAP', 'LAST_PRICE']
ovrds = [('BEST_FPERIOD_OVERRIDE', '1FY'), ('BEST_CONSOLIDATED_OVERRIDE', 'C'),
('EQY_FUND_CRNCY', 'USD')]
elms = [('periodicityAdjustment', 'ACTUAL'),
('periodicitySelection', 'ACTUAL'), ('maxDataPoints', 100)]
df = con.bdh('MSFT US Equity', fields, '20150629', '20150630', elms=elms,
ovrds=ovrds)