How to convert a dictionary to a Dataframe - python

I'm trying to convert a dictionary output into a Dataframe.
For my specific project, I'm using the Bloomberg service API to request a handful of historical datapoints for a stock ticker. They give me the output in dictionary form, but I need to convert it into a more manageable DataFrame. So far all the solutions appear pretty complex. Is there a straightforward pythonic way of accomplishing this?
Thanks!
The snippet of code producing the output:
def main():
output = {}
options = parseCmdLine()
# Fill SessionOptions
sessionOptions = blpapi.SessionOptions()
sessionOptions.setServerHost(options.host)
sessionOptions.setServerPort(options.port)
print ("Connecting to %s:%s" % (options.host, options.port))
# Create a Session
session = blpapi.Session(sessionOptions)
# Start a Session
if not session.start():
print ("Failed to start session.")
return
try:
# Open service to get historical data from
if not session.openService("//blp/refdata"):
print ("Failed to open //blp/refdata")
return
# Obtain previously opened service
refDataService = session.getService("//blp/refdata")
# Create and fill the request for the historical data
request = refDataService.createRequest("HistoricalDataRequest")
request.getElement("fields").appendValue("BEST_SALES")
request.getElement("fields").appendValue("BEST_EBITDA")
request.getElement("fields").appendValue("BEST_EPS")
request.getElement("fields").appendValue("CURR_ENTP_VAL")
request.getElement("fields").appendValue("CUR_MKT_CAP")
request.getElement("fields").appendValue("LAST_PRICE")
# Elements passed to it
request.getElement("securities").appendValue("MSFT US Equity")
# Add overrides
overrides = request.getElement("overrides")
override1 = overrides.appendElement()
override1.setElement("fieldId", "BEST_FPERIOD_OVERRIDE")
override1.setElement("value", "1FY")
override2 = overrides.appendElement()
override2.setElement("fieldId", "BEST_CONSOLIDATED_OVERRIDE")
override2.setElement("value", "C")
override3 = overrides.appendElement()
override3.setElement("fieldId", "EQY_FUND_CRNCY")
override3.setElement("value", "USD")
# Add historical adjustments
request.set("periodicityAdjustment", "ACTUAL")
request.set("periodicitySelection", "DAILY")
request.set("maxDataPoints", 100)
## NEED TO PASS THIS
request.set("startDate", "20160106")
request.set("endDate", "20160107")
print ("Sending Request:", request)
# Send the request
session.sendRequest(request)
# Process received events
while(True):
# We provide timeout to give the chance for Ctrl+C handling:
ev = session.nextEvent(500)
for msg in ev:
print (msg)
if ev.eventType() == blpapi.Event.RESPONSE:
# Response completely received, so we could exit
break
finally:
# Stop the session
session.stop()
Output in dictionary form:
HistoricalDataResponse = {
securityData = {
security = "MSFT US Equity"
eidData[] = {
}
sequenceNumber = 0
fieldExceptions[] = {
}
fieldData[] = {
fieldData = {
date = 2016-01-06
BEST_SALES = 98338.750000
BEST_EPS = 3.108000
CURR_ENTP_VAL = 373535.702300
CUR_MKT_CAP = 431746.702300
LAST_PRICE = 54.050000
}
fieldData = {
date = 2016-01-07
BEST_SALES = 98351.040000
BEST_EBITDA = 37885.200000
BEST_EPS = 3.110000
CURR_ENTP_VAL = 358518.425700
CUR_MKT_CAP = 416729.425700
LAST_PRICE = 52.170000
}
}
}
}

You could take a look at the pdblp package (Disclaimer: I'm the author)
For your example something like should work (I don't currently have a bbg
connection though so haven't actually tested this)
import pdblp
con = pdblp.BCon()
con.start()
fields = ['BEST_SALES', 'BEST_EBITDA', 'BEST_EPS', 'CURR_ENTP_VAL',
'CUR_MKT_CAP', 'LAST_PRICE']
ovrds = [('BEST_FPERIOD_OVERRIDE', '1FY'), ('BEST_CONSOLIDATED_OVERRIDE', 'C'),
('EQY_FUND_CRNCY', 'USD')]
elms = [('periodicityAdjustment', 'ACTUAL'),
('periodicitySelection', 'ACTUAL'), ('maxDataPoints', 100)]
df = con.bdh('MSFT US Equity', fields, '20150629', '20150630', elms=elms,
ovrds=ovrds)

Related

Integrating Progress Bar for API Call

Background: I have seen lots of examples of integrating a progress bar into a for loop, however nothing for my use case, and as such am looking for some advice.
For my use case, I am calling an API and testing if meta is in the response (meta = data I need). If meta is not in the API response, then the API returns a key pair value named percent_complete, which indicates the data I am trying to return is still aggregating, and provides a value on the progress of data aggregation.
Current code:
def api_call():
key, secret, url = ini_reader()
endpoint_url = endpoint_initializer()
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "111"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
print(f' Your data request for: {id_value} is {res2}% complete!')
time.sleep(60)
elif "meta" in api_response:
return api_response
What I am trying to achieve: {res2} *100 gives the percentage, which I would like to use the measure of progress in a progress bar.
Can anyone suggest an appropriate dependency to use?
You can use the Enlighten library. You can keep your print statements and have multiple progress bars at the same time without making any other changes. Below is an example of how you might implement it.
Based on your example it looks like id_value changes, so I wrote the example like that. If it doesn't change you can just use it in the description. And if you have multiples, you'd probably want to create a progress bar for each. If you want to remove your progress bars after they complete, just add leave=False to manager.Counter().
The library is very customizable and the documentation has a lot of examples.
import enlighten
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
def api_call():
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
...
while True:
...
if "meta" not in api_response:
...
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
else:
...
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response
Thanks to Aviso, and for everyone's benefit, here is the completed function -
def api_call():
endpoint_url = endpoint_initializer()
key, secret, url = ini_reader()
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
date = dt.datetime.today().strftime("%Y-%m-%d")
print("------------------------------------\n","API URL constructed for:", date, "\n------------------------------------")
print("-------------------------------------------------------------\n","Endpoint:", endpoint_url, "\n-------------------------------------------------------------")
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "381"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
time.sleep(60)
elif "meta" in api_response:
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response

How to get FX Forward Rates instead of Points in BLPAPI in Python

I am trying to get "CHF1M Curncy" as Forward Rates and not as points from Bloomberg API BLPAPI in Python. The Code itself works fine for the Forward Points, but as soon as I use the override to switch from Points to Rates, I get an error "No value for []" for the line with the first override. This is my code:
import blpapi
import json
HISTORICAL_DATA_RESPONSE = blpapi.Name("HistoricalDataResponse")
def historical_bloomberg_data(securities, fields, periodicity="DAILY", start_date="20190101", end_date="20190105"):
# Create and start a session
print("Creating session ...")
sessionOptions = blpapi.SessionOptions()
sessionOptions.setServerHost('localhost')
sessionOptions.setServerPort(8194)
session = blpapi.Session(sessionOptions)
if not session.start():
print("Failed to start session")
return
try:
# Create and open a service
print("Creating historical data service")
if not session.openService("//blp/refdata"):
print("Failed to create service")
return
refDataService = session.getService("//blp/refdata")
request = refDataService.createRequest("HistoricalDataRequest")
# Add all securities
for security in securities:
request.getElement("securities").appendValue(security)
# Add all fields
for field in fields:
request.getElement("fields").appendValue(field)
# Further settings
request.set("periodicitySelection", periodicity)
request.set("startDate", start_date)
request.set("endDate", end_date)
request.set("maxDataPoints", 2000)
#Override code
overrides = request.getElement('overrides')
override1 = overrides.appendElement()
override1.setElement('fieldID', 'FWD_CURVE_FORMAT')
override1.setElement('value', 'RATES')
#request.setOverride("FWD_CURVE_FORMAT", "RATES")
# Send and process request
print("Sending request...")
session.sendRequest(request)
results = {}
while True:
ev = session.nextEvent(500)
for msg in ev:
if msg.messageType() == HISTORICAL_DATA_RESPONSE:
#response = msg.getElement()
response = msg.getElement("securityData")
_sec = response.getElementAsString("security")
_data = [[fd.getElementAsString("date")] + [fd.getElementAsString(_) for _ in fields] for fd in response.getElement("fieldData").values()]
results[_sec] = results.get(_sec, []) + _data
if ev.eventType() == blpapi.Event.RESPONSE:
break
print("Results retrieved")
return results
The correct override is FWD_CURVE_QUOTE_FORMAT. The values can be:
POINTS
RATES
OUTRIGHT
(I suspect OUTRIGHT and RATES do the same thing)
EDIT
And the correct way to override a field is by using fieldId (ending with a small cap d) and not fieldID.

How can I get Google Calendar API status_code in Python when get list events?

I try to use Google Calendar API
events_result = service.events().list(calendarId=calendarId,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
Everything is ok, when I have permission to access the calendarId, but it will be errors if wrong when I don't have calendarId permission.
I build an autoload.py function with schedule python to load events every 10 mins, this function will be stopped if error come, and I have to use SSH terminal to restart autoload.py manually
So i want to know:
How can I get status_code, example, if it is 404, python will PASS
Answer:
You can use a try/except block within a loop to go through all your calendars, and skip over accesses which throw an error.
Code Example:
To get the error code, make sure to import json:
import json
and then you can get the error code out of the Exception:
calendarIds = ["calendar ID 1", "calendar ID 2", "calendar Id 3", "etc"]
for i in calendarIds:
try:
events_result = service.events().list(calendarId=i,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
except Exception as e:
print(json.loads(e.content)['error']['code'])
continue
Further Reading:
Python Try Except - w3schools
Python For Loops - w3schools
Thanks to #Rafa Guillermo, I uploaded the full code to the autoload.py program, but I also wanted to know, how to get response json or status_code for request Google API.
The solution:
try:
code here
except Exception as e:
continue
import schedule
import time
from datetime import datetime
import dir
import sqlite3
from project.function import cmsCalendar as cal
db_file = str(dir.dir) + '/admin.sqlite'
def get_list_shop_from_db(db_file):
cur = sqlite3.connect(db_file).cursor()
query = cur.execute('SELECT * FROM Shop')
colname = [ d[0] for d in query.description ]
result_list = [ dict(zip(colname, r)) for r in query.fetchall() ]
cur.close()
cur.connection.close()
return result_list
def auto_load_google_database(list_shop, calendarError=False):
shopId = 0
for shop in list_shop:
try:
shopId = shopId+1
print("dang ghi vao shop", shopId)
service = cal.service_build()
shop_step_time_db = list_shop[shopId]['shop_step_time']
shop_duration_db = list_shop[shopId]['shop_duration']
slot_available = list_shop[shopId]['shop_slots']
slot_available = int(slot_available)
workers = list_shop[shopId]['shop_workers']
workers = int(workers)
calendarId = list_shop[shopId]['shop_calendarId']
if slot_available > workers:
a = workers
else:
a = slot_available
if shop_duration_db == None:
shop_duration_db = '30'
if shop_step_time_db == None:
shop_step_time_db = '15'
shop_duration = int(shop_duration_db)
shop_step_time = int(shop_step_time_db)
shop_start_time = list_shop[shopId]['shop_start_time']
shop_start_time = datetime.strptime(shop_start_time, "%H:%M:%S.%f").time()
shop_end_time = list_shop[shopId]['shop_end_time']
shop_end_time = datetime.strptime(shop_end_time, "%H:%M:%S.%f").time()
# nang luc moi khung gio lay ra tu file Json WorkShop.js
booking_status = cal.auto_load_listtimes(service, shopId, calendarId, shop_step_time, shop_duration, a,
shop_start_time,
shop_end_time)
except Exception as e:
continue
def main():
list_shop = get_list_shop_from_db(db_file)
auto_load_google_database(list_shop)
if __name__ == '__main__':
main()
schedule.every(5).minutes.do(main)
while True:
# Checks whether a scheduled task
# is pending to run or not
schedule.run_pending()
time.sleep(1)

How can I get my Python Code to restart when the network disconnects

I have a piece of Python Code running as a service that pulls weather data via API.
The code itself runs perfectly fine when everything is hunky dory, ie the network, but I have noticed that sometimes the WiFi on the Pi that is pulling the API data will drop and then the python codes seems to stop.
I have a small line of code providing the most basic of logs, but I would like to improve upon it greatly. The log code just provides me with the datetime.now so I can see when the last time the code ran was.
#!/usr/bin/python3
#import modules
import cymysql
from time import sleep
from urllib.request import urlopen
import json
import datetime
#set MySQl Variables
host = "localhost"
user = "xxx"
password = "xxx"
schema = "xxx"
#connect to MySQL DB
db = cymysql.connect(host, user, password, schema)
curs = db.cursor()
#set api key for DarkSky API
apikey="xxx"
# Latitude & longitude
lati="-26.20227"
longi="28.04363"
# Add units=si to get it in sensible ISO units.
url="https://api.forecast.io/forecast/"+apikey+"/"+lati+","+longi+"?units=si"
#begin infinite loop
while True:
#convert API reading to json and readable array 'weather'
meteo=urlopen(url).read()
meteo = meteo.decode('utf-8')
weather = json.loads(meteo)
#set variables for current weather
cTemp = (weather['currently']['temperature'])
cCond = (weather['currently']['summary'])
cRain1 = (weather['currently']['precipProbability'])
cRain2 = cRain1*100
cIcon = (weather['currently']['icon'])
oaSum = (weather['daily']['summary'])
#print variables - for testing purposes
#print (cTemp)
#print (cCond)
#print (cRain2)
#print (cIcon)
#print (oaSum)
#extract daily data from 'weather' array
daily = (weather['daily']['data'])
#create new arrays for daily variables
listHigh = []
listLow = []
listCond = []
listRain = []
listIcon = []
#set daily variables
for i in daily:
listHigh.append(i['temperatureHigh'])
for i in range(0,len(listHigh)):
high1 = listHigh[0]
high2 = listHigh[1]
high3 = listHigh[2]
high4 = listHigh[3]
high5 = listHigh[4]
high6 = listHigh[5]
high7 = listHigh[6]
high8 = listHigh[7]
for o in daily:
listLow.append(o['temperatureLow'])
for o in range(0,len(listLow)):
low1 = listLow[0]
low2 = listLow[1]
low3 = listLow[2]
low4 = listLow[3]
low5 = listLow[4]
low6 = listLow[5]
low7 = listLow[6]
low8 = listLow[7]
for p in daily:
listCond.append(p['summary'])
for p in range(0,len(listCond)):
cond1 = listCond[0]
cond2 = listCond[1]
cond3 = listCond[2]
cond4 = listCond[3]
cond5 = listCond[4]
cond6 = listCond[5]
cond7 = listCond[6]
cond8 = listCond[7]
for m in daily:
listRain.append(m['precipProbability'])
for m in range(0,len(listRain)):
rain1 = listRain[0]
rain2 = listRain[1]
rain3 = listRain[2]
rain4 = listRain[3]
rain5 = listRain[4]
rain6 = listRain[5]
rain7 = listRain[6]
rain8 = listRain[7]
#convert rain chance to readable percentage
prain1 = rain1*100
prain2 = rain2*100
prain3 = rain3*100
prain4 = rain4*100
prain5 = rain5*100
prain6 = rain6*100
prain7 = rain7*100
prain8 = rain8*100
for l in daily:
listIcon.append(l['icon'])
for l in range (0,len(listIcon)):
icon1 = listIcon[0]
icon2 = listIcon[1]
icon3 = listIcon[2]
icon4 = listIcon[3]
icon5 = listIcon[4]
icon6 = listIcon[5]
icon7 = listIcon[6]
icon8 = listIcon[7]
#print daily variables - for testing purposes
#print (high1)
#print (low1)
#print (cond1)
#print (prain1)
#print (icon1)
#print (high2)
#print (low2)
#print (cond2)
#print (prain2)
#print (icon2)
#update data in DataBase
try:
sql_update_query = """UPDATE weather SET current_temp = %s, cur$
varis = (cTemp, cCond, cRain2, cIcon, high1, low1, cond1, prain$
curs.execute(sql_update_query, varis)
db.commit()
except db.Error as error:
print("Error: {}".format(error))
db.rollback()
#write date to log file
with open ("/home/pi/CoRo/Projects/WeatherMan/weatherlog.txt", mode="w") as file:
file.write('Last Data was pulled at: %s' %(datetime.datetime.now()))
#set loop to sleep for 10 minutes and go again
sleep(600)
I understand that the Database Code is snipped, but it is just the variables being put in to the database, which I can see works.
However if the network disconnects, the code stops and the database is left with the last polled API data.
How would I restart the python code if the API get fails?
Thanks in advance,
You could rewrite the portion of your code that pulls the weather data as a function or separate module. This would allow you to call it only when the network connection is working. Some pseudo code below:
if network_connection:
pull_weather_data()
else:
do_something()
do_something() could be an effort to reconnect to the network, such as resetting your network adapter.
You could determine the state of the network connection by trying to ping your router or an external IP like one of Google's DNS server (8.8.8.8 or 8.8.4.4).
To avoid nested loops you could use the continue clause. For example:
while True:
if network_connection:
pull_weather_data()
else:
reset_network_connection()
time.sleep(180) # Sleep for 3 minutes.
continue
The continue will send the interpreter back to the start of the while loop. From there it will check the network connection and either pull data or reset the network connection and sleep for another 3 minutes.
Using Quernons answer above the code has been edited as follows:
#!/usr/bin/python3
#import modules
import os
import cymysql
from time import sleep
from urllib.request import urlopen
import json
import datetime
#set MySQl Variables
host = "localhost"
user = "xxx"
password = "xxx"
schema = "xxx"
#connect to MySQL DB
db = cymysql.connect(host, user, password, schema)
curs = db.cursor()
#set api key for DarkSky API
apikey="xxx"
# Latitude & longitude
lati="-26.20227"
longi="28.04363"
# Add units=si to get it in sensible ISO units not stupid Fahreneheit.
url="https://api.forecast.io/forecast/"+apikey+"/"+lati+","+longi+"?units=si"
#begin infinite loop
while True:
#function to check if there is an internet connection
def check_ping():
hostname = "8.8.8.8"
response = os.system("ping -c 1 " + hostname)
#and then check the response...
if response == 0:
pingstatus = 0
else:
pingstatus = 1
return pingstatus
networkstatus = check_ping()
#print check_ping() - for testing purposes
#print (networkstatus)
#function to pull weather data from API
def get_weather():
#insert weather data here with no changes
if networkstatus == 0:
get_weather()
else:
print ("Resetting Network Adapters")
dwnnw = 'ifconfig wlan0 down'
upnw = 'ifconfig wlan0 up'
os.system(dwnnw)
os.system(upnw)
sleep(180)
continue

Adding A Sentiment Analysis Loop When Collecting Twitter data

I am currently trying to add a sentiment analysis loop to a python script that collects tweets. When I run the script without the loop, it can generate the tweets just fine; however, whenever I add the for loop in ( starting at "for tweets in tweets returned", the tweets no longer generates and the csv I created does not appear as well. I was wondering if this had to do with where I have placed the for loop within the script or if there is some error with the loop itself. Any help would be greatly appreciated, thanks!
sentiments=[]
sentiment_means=[]
# Create URL Structure
class RequestWithMethod(urllib.request.Request):
def __init__(self, base_url, method, headers={}):
self._method = method
urllib.request.Request.__init__(self, base_url, headers)
def get_method(self):
if self._method:
return self._method
else:
return urllib.request.Request.get_method(self)
#Create Endpoint & Add Credentials
def create_rules_endpoint(query):
new_url = base_url + query
base64string = ('%s:%s' % (UN, PWD)).replace('\n', '')
base = base64.b64encode(base64string.encode('ascii'))
final_final_url = urllib.request.Request(new_url)
final_final_url.add_header('Authorization', 'Basic %s' % base.decode('ascii'))
return final_final_url
# Take in the Endpoint and Make the Request
def make_request(search_endpoint):
try:
response = urllib.request.urlopen(search_endpoint)
response_data = response.read()
handle_response(response_data)
except urllib.request.HTTPError as error:
print("ERROR: %s" % error)
# Handle the Returned Data
def handle_response(data):
tweets_returned = json.loads(data.decode('utf-8'))
print(tweets_returned)
**for tweet in tweets_returned['results']:
counter=1
compound_list=[]
positive_list = []
negative_list = []
neutral_list = []
geo_list = []
compound = analyzer.polarity_scores(tweet["text"])["compound"]
pos = analyzer.polarity_scores(tweet["text"])["pos"]
neu = analyzer.polarity_scores(tweet["text"])["neu"]
neg = analyzer.polarity_scores(tweet["text"])["neg"]
compound_list.append(compound)
positive_list.append(pos)
negative_list.append(neg)
neutral_list.append(neu)
sentiments.append({"Location": tweet["geo"],
"Date": tweet["created_at"],
"Tweet": tweet["text"],
"Compound": compound,
"Positive": pos,
"Neutral": neu,
"Negative": neg,
"Tweets_Ago": counter
})
counter+=1
sentiment_means.append({
"Compound_Mean": np.mean(compound_list),
"Positive": np.mean(positive_list),
"Neutral": np.mean(negative_list),
"Negative": np.mean(neutral_list),
"Count": len(compound_list)
})**
# Create the Endpoint Variable w/ Sample Query Keyword
search_endpoint = create_rules_endpoint('Wilson%20Rackets%20has%3Ageo%20lang%3Aen')
# Make the Request by Passing in Search Endpoint
make_request(search_endpoint)
# Convert all_sentiments to DataFrame
all_sentiments_pd = pd.DataFrame.from_dict(sentiments)
all_sentiments_pd.to_csv("sentiments_array_pd.csv")
display(all_sentiments_pd)
#print(all_sentiments_pd.dtypes)
# Convert sentiment_means to DataFrame
sentiment_means_pd = pd.DataFrame.from_dict(sentiment_means)
display(sentiment_means_pd)

Categories

Resources