I'm having problems properly adding JSON to a Pandas dataframe I'm receiving from a websocket stream. In my code I've tried a few different ways to append the data to dataframe but it ends up all messed up.
Looking at the data I see 321 before each of the lines that I want the data from. I don't know how to access that data: I thought something like mv = check['321'] would access it but it did not. The result variable is what the stream is assigned to so I'm just trying to figure out how to get that in the dataframe.
Code:
import json, time
from websocket import create_connection
import pandas as pd
# start with empty dataframe
df = pd.DataFrame()
for i in range(3):
try:
ws = create_connection("wss://ws.kraken.com/")
except Exception as error:
print('Caught this error: ' + repr(error))
time.sleep(3)
else:
break
ws.send(json.dumps({
"event": "subscribe",
#"event": "ping",
"pair": ["BTC/USD"],
#"subscription": {"name": "ticker"}
#"subscription": {"name": "spread"}
"subscription": {"name": "trade"}
#"subscription": {"name": "book", "depth": 10}
#"subscription": {"name": "ohlc", "interval": 5}
}))
csv_file = "kraken-test.csv"
timeout = time.time() + 60*1
# start with empty dataframe
data = []
#while True:
while time.time() < timeout:
try:
result = ws.recv()
converted = json.loads(result)
check = json.dumps(result)
#mv = converted['321']
#data.append(pd.DataFrame.from_dict(pd.json_normalize(check)))
#data.append(pd.DataFrame.from_dict(converted, orient='columns'))
#data.append(pd.json_normalize(converted), orient='columns')
data.append(check)
print(check)
#print ("Received '%s'" % converted, time.time())
#print(df)
except Exception as error:
print('Caught this error: ' + repr(error))
time.sleep(3)
ws.close()
df = pd.DataFrame(data)
df.to_csv(csv_file, index=False, encoding='utf-8')
Output from print(check):
"[321,[[\"37491.40000\",\"0.00420457\",\"1612471467.490327\",\"b\",\"l\",\"\"]],\"trade\",\"XBT/USD\"]"
"{\"event\":\"heartbeat\"}"
"[321,[[\"37491.40000\",\"0.00154223\",\"1612471468.547627\",\"b\",\"l\",\"\"]],\"trade\",\"XBT/USD\"]"
"{\"event\":\"heartbeat\"}"
"{\"event\":\"heartbeat\"}"
"[321,[[\"37491.40000\",\"0.00743339\",\"1612471470.533849\",\"b\",\"m\",\"\"],[\"37491.40000\",\"0.00001187\",\"1612471470.537466\",\"b\",\"m\",\"\"],[\"37491.40000\",\"0.00000002\",\"1612471470.539063\",\"b\",\"m\",\"\"]],\"trade\",\"XBT/USD\"]"
"{\"event\":\"heartbeat\"}"
"{\"event\":\"heartbeat\"}"
"{\"event\":\"heartbeat\"}"
"{\"event\":\"heartbeat\"}"
"{\"event\":\"heartbeat\"}"
csv output:
0
"""{\""connectionID\"":18300780323084664829,\""event\"":\""systemStatus\"",\""status\"":\""online\"",\""version\"":\""1.7.0\""}"""
"""{\""channelID\"":321,\""channelName\"":\""trade\"",\""event\"":\""subscriptionStatus\"",\""pair\"":\""XBT/USD\"",\""status\"":\""subscribed\"",\""subscription\"":{\""name\"":\""trade\""}}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""{\""event\"":\""heartbeat\""}"""
"""[321,[[\""37500.20000\"",\""0.07021874\"",\""1612471427.916155\"",\""b\"",\""l\"",\""\""],[\""37500.20000\"",\""0.30978126\"",\""1612471427.918316\"",\""b\"",\""l\"",\""\""]],\""trade\"",\""XBT/USD\""]"""
"""[321,[[\""37500.10000\"",\""0.01275000\"",\""1612471428.366246\"",\""s\"",\""l\"",\""\""]],\""trade\"",\""XBT/USD\""]"""
print output of result variable:
{"connectionID":13755154340899011582,"event":"systemStatus","status":"online","version":"1.7.0"}
{"channelID":321,"channelName":"trade","event":"subscriptionStatus","pair":"XBT/USD","status":"subscribed","subscription":{"name":"trade"}}
{"event":"heartbeat"}
[321,[["37679.30000","0.00462919","1612473049.044471","s","l",""]],"trade","XBT/USD"]
{"event":"heartbeat"}
{"event":"heartbeat"}
{"event":"heartbeat"}
[321,[["37684.00000","0.00300000","1612473051.657296","s","m",""]],"trade","XBT/USD"]
Cleaning your code up
remove exception handling that masks what is going on
it then became clear that ws.recv() sometimes returns a dict and sometimes a list
contract a dict from the list
not sure what is contained in 2D list in position 1, so called it measure
pd.concat() is used to build up a dataframe
import json, time
from websocket import create_connection
import pandas as pd
# start with empty dataframe
df = pd.DataFrame()
ws = create_connection("wss://ws.kraken.com/")
ws.send(json.dumps({
"event": "subscribe",
"pair": ["BTC/USD"],
"subscription": {"name": "trade"}
}))
timeout = time.time() + 60*1
while time.time() < timeout:
js = json.loads(ws.recv())
if isinstance(js, dict):
df = pd.concat([df, pd.json_normalize(js)])
elif isinstance(js, list):
df = pd.concat([df, pd.json_normalize({"event":"data",
"data":{
"channelID":js[0],
"measure":js[1],
"channelName":js[2],
"pair":js[3]}
})
])
else:
assert f"unknown socket data {js}"
time.sleep(1)
pick out from "measure"
Does not consider lengths of either dimension. What's being thrown away?
df = pd.concat([df, pd.json_normalize({"event":"data",
"data":{
"channelID":js[0],
"measure":js[1],
"m0":js[1][0][0],
"m1":js[1][0][1],
"m2":js[1][0][2],
"m3":js[1][0][3],
"m4":js[1][0][4],
"channelName":js[2],
"pair":js[3]}
})
])
save the json data to a file ws.txt.
import json, time
from websocket import create_connection
import pandas as pd
ws = create_connection("wss://ws.kraken.com/")
ws.send(json.dumps({
"event": "subscribe",
"pair": ["BTC/USD"],
"subscription": {"name": "trade"}
}))
timeout = time.time() + 5
with open('ws.txt', 'a') as fw:
while time.time() < timeout:
data = ws.recv()+ '\n'
fw.write(data)
print(data, end='')
parse the ws.txt:
df_ws = pd.read_csv('ws.txt', header=None, sep='\n')
obj = df_ws[0].map(json.loads)
df = pd.DataFrame(obj[obj.map(lambda x: isinstance(x, list))].tolist(),
columns=['channelID', 'trade', 'event', 'pair']).explode('trade')
df[['price', 'volume', 'time', 'side', 'orderType', 'misc']] = pd.DataFrame(df['trade'].tolist()).values
cols = ['event', 'price', 'volume', 'time', 'side', 'orderType', 'misc', 'pair']
dfn = df[cols].copy()
print(dfn.head())
# event price volume time side orderType misc pair
# 0 trade 46743.2 0.00667696 1612850630.079810 s m XBT/USD
# 1 trade 46761.1 0.00320743 1612850633.402091 b l XBT/USD
# 2 trade 46766.3 0.04576695 1612850634.419905 s m XBT/USD
# 3 trade 46794.8 0.12000000 1612850637.033033 s l XBT/USD
# 3 trade 46787.2 0.08639234 1612850637.036229 s l XBT/USD
Related
I have a python code which works for doing data analytics from csv file. I want to run my python code to be run periodically on a docker container. Every 15 seconds, it should automatically look at a folder A, if there is a csv file in it, it should process it and put an html report with the same name in folder B.
HERE IS MY PYTHON CODE .
#This program pulls data from csv file and displays it as html file.
#csv file contains device names, card names and temperatures of cards
#The html file contains: how many devices, how many cards are in the system, which
#device has the highest temperature card, and in the table below is how many cards are
#there in summary for each device, how many have a temperature of 70 and above, the
#highest and average card what are the temperatures
#NOTE: The print functions in the program are written for trial purposes.
from enum import unique
from re import A, T
import pandas as pd
from prettytable import PrettyTable, PLAIN_COLUMNS
table = PrettyTable() #create a table for device
table2 = PrettyTable() #create a table for summary
table.field_names = ["Device -", "Total # of Cards - ", "High Temp. Cards # - ", "Max Temperature - ", "Avg. Temperature "]
table2.field_names = [" "," "]
df = pd.read_csv("cards.csv", sep=';', usecols = ['Device','Card','Temperature'])""", index_col=["Device","Card"]"""
print(type(df))
print(df["Device"].nunique(),"\n\n") # number of unique server
total_devices = df["Device"].nunique() # NUMBER OF DEVICES IN DIFFERENT TYPES
print(total_devices)
print(df["Device"].loc[1],"\n\n")
print(df['Temperature'].max(),"\n\n")
maxTemp = df['Temperature'].max() #finding max temperature
print("total card ", )
i= 0
j=1
#Finding the card with the max temperature and the server where the card is located
while j > 0:
if df["Temperature"].loc[i] == df["Temperature"].max():
print(df["Device"].loc[i])
print(df["Card"].loc[i])
deviceName = df["Device"].loc[i]
cardName = df["Card"].loc[i]
j= 0
else :
i = i+1
dev_types = df["Device"].unique() # Server's names
print("\n\n")
newstr = cardName + "/" + deviceName
#Summary tablosunu olusturma
table2.add_row(["Total Devices ", total_devices] )
table2.add_row(["Total Cads ", len(df["Card"])])
table2.add_row(["Max Card Temperature ", df["Temperature"].max()])
table2.add_row(["Hottest Card / Device " ,newstr])
print(table2)
row_num = len(df)
print(row_num)
#I pulled the data from the file according to the device type so that the server cards and temperatures were sorted, I found the max temp from here
dn = pd.read_csv("cards.csv", sep=';', index_col=["Device"], usecols = ['Device','Card','Temperature'])
sum = []
high = []
#print("max temp: ", dn["Temperature"].loc[dev_types[1]].max())
for x in range(total_devices): # total devices (according the file = 3 )
print("\n")
cardCount = 0 # counts the number of cards belonging to the device
count2 = 0 # Counts the number of cards with a temperature greater than 70
tempcount = 0
print(dev_types[x])
for y in range(row_num):
if dev_types[x] == df["Device"].loc[y]:
print(df["Temperature"].loc[y])
tempcount = tempcount + df["Temperature"].loc[y] # the sum of the temperatures of the cards(used when calculating the average)
cardCount = cardCount +1
if df["Temperature"].loc[y] >= 70:
count2 = count2 +1
maxT = dn["Temperature"].loc[dev_types[x]].max() #Finding the ones with the max temperature from the cards belonging to the server
avg = str(tempcount/cardCount)
print("avg",avg)
table.add_row([dev_types[x], cardCount, count2, maxT,avg ]) # I added the information to the "devices" table
print("num of cards" , cardCount)
print("high temp cards" , count2)
print("\n\n")
print("\n\n")
print(table)
htmlCode = table.get_html_string()
htmlCode2 = table2.get_html_string()
f= open('devices.html', 'w')
f.write("SUMMARY")
f.write(htmlCode2)
f.write("DEVICES")
f.write(htmlCode)
Whether or not the code is run in Docker doesn't matter.
Wrap all of that current logic (well, not the imports and so on) in a function, say, def process_cards().
Call that function forever, in a loop:
import logging
def process_cards():
table = PrettyTable()
...
def main():
logging.basicConfig()
while True:
try:
process_cards()
except Exception:
logging.exception("Failed processing")
time.sleep(15)
if __name__ == "__main__":
main()
As an aside, your data processing code can be vastly simplified:
import pandas as pd
from prettytable import PrettyTable
def get_summary_table(df):
summary_table = PrettyTable() # create a table for summary
total_devices = df["Device"].nunique()
hottest_card = df.loc[df["Temperature"].idxmax()]
hottest_device_desc = f"{hottest_card.Card}/{hottest_card.Device}"
summary_table.add_row(["Total Devices", total_devices])
summary_table.add_row(["Total Cards", len(df["Card"])])
summary_table.add_row(["Max Card Temperature", df["Temperature"].max()])
summary_table.add_row(["Hottest Card / Device ", hottest_device_desc])
return summary_table
def get_devices_table(df):
devices_table = PrettyTable(
[
"Device",
"Total # of Cards",
"High Temp. Cards #",
"Max Temperature",
"Avg. Temperature",
]
)
for device_name, group in df.groupby("Device"):
count = len(group)
avg_temp = group["Temperature"].mean()
max_temp = group["Temperature"].max()
high_count = group[group.Temperature >= 70]["Temperature"].count()
print(f"{device_name=} {avg_temp=} {max_temp=} {high_count=}")
devices_table.add_row([device_name, count, high_count, max_temp, avg_temp])
return devices_table
def do_processing(csv_file="cards.csv", html_file="devices.html"):
# df = pd.read_csv(csv_file, sep=';', usecols=['Device', 'Card', 'Temperature'])
# (Just some random example data)
df = pd.DataFrame({
"Device": [f"Device {1 + x // 3}" for x in range(10)],
"Card": [f"Card {x + 1}" for x in range(10)],
"Temperature": [59.3, 77.2, 48.5, 60.1, 77.2, 61.1, 77.4, 65.8, 71.2, 60.3],
})
summary_table = get_summary_table(df)
devices_table = get_devices_table(df)
with open(html_file, "w") as f:
f.write(
"<style>table, th, td {border: 1px solid black; border-collapse: collapse;}</style>"
)
f.write("SUMMARY")
f.write(summary_table.get_html_string(header=False))
f.write("DEVICES")
f.write(devices_table.get_html_string())
do_processing()
i have an example of repeat decorator for run your function every seconds or minutes ...
i hope this sample helps you
from typing import Optional, Callable, Awaitable
import asyncio
from functools import wraps
def repeat_every(*, seconds: float, wait_first: bool = False)-> Callable:
def decorator(function: Callable[[], Optional[Awaitable[None]]]):
is_coroutine = asyncio.iscoroutinefunction(function)
#wraps(function)
async def wrapped():
async def loop():
if wait_first:
await asyncio.sleep(seconds)
while True:
try:
if is_coroutine:
await function()
else:
await asyncio.run_in_threadpool(function)
except Exception as e:
raise e
await asyncio.sleep(seconds)
asyncio.create_task(loop())
return wrapped
print("Repeat every working well.")
return decorator
#repeat_every(seconds=2)
async def main():
print(2*2)
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
print('Async event loop already running.')
tsk = loop.create_task(main())
tsk.add_done_callback(
lambda t: print(f'Task done with result= {t.result()}'))
else:
print('Starting new event loop')
asyncio.run(main())
and there is an option that you can make an entrypoint which has cronjob
I've recently jumped from VBA to python trying to improve the reporting on my company so I am a bit new in python.
What my program tries to do is after consolidating various CSV files in one dataframe, applying datavalidation in form of a dropdown list with xlsxwriter.
I was getting a corrupted file everytime after the process was done and so, the data validation was gone.
I've been playing with the limits to find out it can only apply data validation for 71215 rows of data, after that the file becomes corrupt.
Because this only happens with large data, I can't provide an example of dataframe but it would be like:
df = pd.read_csv(fichero, sep=";", dtype={
"Dirección": "string",
"Territorio": "string",
"DNI": "string",
"Agente": "string",
"CentroACC": "string",
"ModoACC": "string",
"Fecha": "string",
"Presentes": "string",
"Contratados": "string",
"Modo Planificación": "string",
}
)
Here is the code which is implementing the data validation. Works fine, won't raise any error, but the result is a corrupted xlsx file.
import pandas as pd
import xlsxwriter as xl
import numpy as np
import time
from diccionarios import diccionarioHabilidadesAgente as dh
def cargaListaHabilidades(df, Fecha):
df = df.copy();
df['Habilidades Vigentes'] = np.where((df['INICIO'] <= Fecha) & (Fecha <= df['FIN']), 1, 0)
df = df.loc[df['Habilidades Vigentes']==1]
return df['Campaña'].tolist()
def main(df, rutaficheros):
inicio = time.perf_counter()
dfHabilidades = pd.read_csv(rutaficheros + 'Habilidades.csv', sep=";")
diccionarioHabilidades = dh(dfHabilidades)
fin1 = time.perf_counter()
print('diccionario habilidades generado en: ', fin1 - inicio, ' segundos.')
del(dfHabilidades)
fichero = rutaficheros + 'Consolidado_ACC.xlsx'
writer = pd.ExcelWriter(fichero, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Descarga ACC')
workbook = writer.book
worksheet = writer.sheets['Descarga ACC']
fin2 = time.perf_counter()
print('Excel generado en: ', fin2 - fin1, ' segundos.')
for fila in range(0, 71214):#len(df)):
Agente = df.iloc[fila]['DNI']
Fecha = df.iloc[fila]['Fecha']
if Agente in diccionarioHabilidades:
if Fecha in diccionarioHabilidades[Agente]:
lista = diccionarioHabilidades[Agente][Fecha]
worksheet.data_validation('K' + str(fila + 2), {'validate': 'list',
'source': lista})
fin3 = time.perf_counter()
print('validación aplicada en: ', fin3 - fin2)
workbook.close()
print('guardado el fichero en: ', time.perf_counter() - fin3, ' segundos.')
return
Like this the code works because I manually input 71214 on the loop, but the total number of rows is like 100k.
I've wanted to ask if someone would know why is the reason of this before doing the less pretty process of spliting the df in 2 and generate 2 separated files.
Edit:
Excel can handle way more data validations (it was done by VBA).
Each data validation needs to be applied on it's own because people have different set of skills for each day of the month.
I have this data entry:
[{'id': 2269396, 'from': 1647086100, 'at': 1647086160000000000, 'to': 1647086160, 'open': 1.072652, 'close': 1.072691, 'min': 1.072641, 'max': 1.072701, 'volume': 0},..]
Apllying this indexing pandas:
current = self.getAllCandles(self.active_id,start_candle)
main = pd.DataFrame()
useful_frame = pd.DataFrame()
for candle in current:
useful_frame = pd.DataFrame(list(candle.values()),index = list(candle.keys())).T.drop(columns = ['at'])
useful_frame = useful_frame.set_index(useful_frame['from']).drop(columns = ['id'])
main = main.append(useful_frame)
main.drop_duplicates()
final_data = main.drop(columns = {'to'})
final_data = final_data.loc[~final_data.index.duplicated(keep = 'first')]
return final_data
After that I have the following result:
from open close min max volume
from
1.647086e+09 1.647086e+09 1.072652 1.072691 1.072641 1.072701 0.0
... ... ... ... ... ... ...
Since df.append() will be deprecated, I'm struggling to execute the same instructions using df.concat(). But I'm not getting it, how could I change that?
Thank you all, I made a small modification to the code suggested by our friend Stuart Berg #stuart-berg, and it was perfect:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
useful_frame = pd.DataFrame.from_dict(current, orient='columns')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
return final_data
I think this is what you're looking for:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
for candle in current:
useful_frame = pd.DataFrame.from_dict(candle, orient='columns')
#useful_frame['from'] = datetime.datetime.fromtimestamp(int(useful_frame['from'])).strftime('%Y-%m-%d %H:%M:%S')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
Create an empty python list and then append all the series to the list. Finally call pandas' concat on that list, this will give you that dataframe.
I know this question has been asked before, but I've tried to implement all the solutions I've found and still haven't solved the problem.
Here is my code
import csv
import pandas as pd
import helperFunctions import pandas tickers = []
f = open("watchlist.txt", "r") for i in f:
tickers.append((helperFunctions.getTicker(i)))
head = ["Name", "Growth", "Recommendation", "Earnings Growth", "Current Ratio",
"Total Cash", "Debt", " Revenue", "Percentage Shares Out", "Percentage Institutions", "Percentage Insiders",
"Price to Book", "Short Ratio", "Regular Market Price"] df = pd.DataFrame() df = pd.DataFrame(columns=head) try:
for i in tickers:
currentTicker = []
currentTicker.append(i.info['longName'])
currentTicker.append(i.info['revenueGrowth'])
currentTicker.append(i.info['recommendationKey'])
currentTicker.append(i.info['earningsGrowth'])
currentTicker.append(i.info['currentRatio'])
currentTicker.append(i.info['totalCash'])
currentTicker.append(i.info['totalDebt'])
currentTicker.append(i.info['totalRevenue'])
currentTicker.append(i.info['sharesPercentSharesOut'])
currentTicker.append(i.info['heldPercentInstitutions'])
currentTicker.append(i.info['heldPercentInsiders'])
currentTicker.append(i.info['priceToBook'])
currentTicker.append(i.info['shortRatio'])
currentTicker.append(i.info['beta'])
currentTicker.append(i.info['regularMarketPrice'])
print(str(currentTicker + "\n"))
'''
# Why Don't these work??
1.
df.append(currentTicker)
2.
df_length = len(df)
df.loc[df_length] = currentTicker
3.
a_series = pd.Series(currentTicker, index=df.columns)
df = df.append(a_series, ignore_index=True)
'''
except Exception as e:
print(str(i) + repr(e))
print(df)
In the section where you see comments in the list, those are all things things I've attempted to add each iteration to the dataframe. Basically the watchlist is a txtfile with some tickers, and I would like to get that data and put it into a dataframe and I'm running into some trouble. Thanks for your time.
This was how I ended up solving it
import csv
import pandas as pd
import helperFunctions
import pandas
tickers = []
f = open("watchlist.txt", "r")
for i in f:
tickers.append((helperFunctions.getTicker(i)))
head = ["Name", "Growth", "Recommendation", "Earnings Growth", "Current Ratio",
"Total Cash", "Debt", "Revenue", "Percentage Shares Out", "Percentage Institutions", "Percentage Insiders",
"Price to Book", "Short Ratio", "Regular Market Price"]
df = pd.DataFrame(columns=head)
try:
for i in tickers:
currentTicker = []
currentTicker.append(i.info['longName'])
currentTicker.append(i.info['revenueGrowth'])
currentTicker.append(i.info['recommendationKey'])
currentTicker.append(i.info['earningsGrowth'])
currentTicker.append(i.info['currentRatio'])
currentTicker.append(i.info['totalCash'])
currentTicker.append(i.info['totalDebt'])
currentTicker.append(i.info['totalRevenue'])
currentTicker.append(i.info['sharesPercentSharesOut'])
currentTicker.append(i.info['heldPercentInstitutions'])
currentTicker.append(i.info['heldPercentInsiders'])
currentTicker.append(i.info['priceToBook'])
currentTicker.append(i.info['shortRatio'])
currentTicker.append(i.info['beta'])
currentTicker.append(i.info['regularMarketPrice'])
df = df.append({'Name': currentTicker[0],
'Growth': currentTicker[1],
"Recommendation":currentTicker[2],
"Earnings Growth":currentTicker[3],
"Current Ratio": currentTicker[4],
"Total Cash":currentTicker[5],
"Debt": currentTicker[6],
"Revenue": currentTicker[7],
"Percentage Shares Out": currentTicker[8],
"Percentage Institutions": currentTicker[9],
"Percentage Insiders": currentTicker[10],
"Price to Book": currentTicker[11],
"Short Ratio": currentTicker[12],
"Regular Market Price": currentTicker[13],
},
ignore_index=True)
#print(currentTicker)
except Exception as e:
print(str(i) + repr(e))
print(df)
The issue in your code seems to be related to the fact that the currentTicker list has an extra column, namely beta, which is not present in the columns of the df.
Once you either add that column to head or remove it from currentTicker, method 2 and 3 will work.
import csv
import pandas as pd
import helperFunctions
with open("watchlist.txt", "r") as f:
tickers = [helperFunctions.getTicker(i) for i in f]
head = [
"Name",
"Growth",
"Recommendation",
"Earnings Growth",
"Current Ratio",
"Total Cash",
"Debt",
"Revenue",
"Percentage Shares Out",
"Percentage Institutions",
"Percentage Insiders",
"Price to Book",
"Short Ratio",
"Regular Market Price"
]
df = pd.DataFrame(columns=head)
columns_to_extract = [
'longName',
'revenueGrowth',
'recommendationKey',
'earningsGrowth',
'currentRatio',
'totalCash',
'totalDebt',
'totalRevenue',
'sharesPercentSharesOut',
'heldPercentInstitutions',
'heldPercentInsiders',
'priceToBook',
'shortRatio',
# 'beta', # <- The column for this value is missing from `head`
'regularMarketPrice'
]
currentTicker = [i.info[key] for key in columns_to_extract]
# 1. Method - THIS DOES NOT WORK
# df.append(currentTicker)
# 2. Method
df_length = len(df)
df.loc[df_length] = currentTicker
# 3. Method
a_series = pd.Series(currentTicker, index=df.columns)
df = df.append(a_series, ignore_index=True)
print(df)
I am trying to replicate the following code which work smoothly and add a parameter for date to the function and run the function with different date in a loop:
FUNCTION V1:
def getOHLCV(currencies):
c_price = []
data = {}
try:
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/ohlcv/historical'
parameters = {
'symbol': ",".join(currencies),
#'time_start': ",".join(start_dates),
'count':'91',
'interval':'daily',
'convert':'JPY',
}
headers = {
'Accepts': 'application/json',
'X-CMC_PRO_API_KEY': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
}
session = Session()
session.headers.update(headers)
response = session.get(url, params=parameters)
data = json.loads(response.text)
for currency in data['data']:
used_list = [
item['quote']['JPY']
for item in data['data'][currency]['quotes']
]
price = pd.DataFrame.from_records(used_list)
price['timestamp'] = pd.to_datetime(price['timestamp'])
price['timestamp'] = price['timestamp'].astype(str).str[:-15]
price_c = price.set_index('timestamp').close
c_price.append(price_c.rename(currency))
except Exception as e:
print (data)
return c_price
c_price = []
c_price.extend(getOHLCV(available[:61]))
c_price.extend(getOHLCV(available[61:]))
c_price = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
c_price = c_price.transpose()
c_price.index.name = 'currency'
c_price.sort_index(axis=0, ascending=True, inplace=True)
OUTPUT:
2019-07-25 2019-07-26 2019-07-27 2019-07-28 2019-07-29 \
currency
1WO 2.604104 2.502526 2.392313 2.418967 2.517868
ABX 1.015568 0.957774 0.913224 0.922612 1.037273
ADH 0.244782 0.282976 0.309931 0.287933 0.309613
... ... ... ... ... ...
XTX 0.156103 0.156009 0.156009 0.165103 0.156498
ZCO 0.685255 0.661324 0.703521 0.654763 0.616204
ZPR 0.214395 0.204968 0.181529 0.178460 0.177596
FUNCTION V2:
The V2 function add a parameter start_dates and loop the function with this new parameter. The issue is I got an empty dataframe from it. I assume that there is an issue with the date but I don't know where. Any help is appreciated.
def getOHLCV(currencies, start_dates):
...
'symbol': ",".join(currencies),
'time_start': ",".join(start_dates),
...
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
for i in date_list:
c_price = []
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
The array you are extending you are clearing at each iteration of the foor loop, it can be fixed like so
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
c_price = []
for i in date_list:
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
Hope that works for you
EDIT 1
So we need to fix the error : "time_start" must be a valid ISO 8601 timestamp or unix time value'
This is because the return from this
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
Is this
[datetime.date(2019, 7, 24), datetime.date(2019, 4, 24)]
So we need to convert the list from datetime objects to something that the API will understand, we can do it the following way
date_list = list(map(date.isoformat, date_list))
And we get the following output
['2019-07-24', '2019-04-24']
Edit 2
The error happens when we try to call join on something that isnt a list, so we can fix it by doing
'time_start': start_dates
Instead of doing
'time_start': ",".join(start_dates),