how to convert the Json to table in python - python

try:
# For Python 3.0 and later
from urllib.request import urlopen
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen
import certifi
import json
def get_jsonparsed_data(url):
response = urlopen(url, cafile=certifi.where())
data = response.read().decode("utf-8")
return json.loads(data)
url = ("https://financialmodelingprep.com/api/v3/ratios/AAPL?apikey=92a1dad5aef4eb31276c19417c31dfeb")
print(get_jsonparsed_data(URL))

import requests
import pandas as pd
url = (
"https://financialmodelingprep.com/api/v3/ratios/AAPL?"
"apikey=92a1dad5aef4eb31276c19417c31dfeb"
)
response = requests.get(url)
data = response.json()
df = pd.DataFrame(data)
df
prints:
symbol date period currentRatio quickRatio cashRatio daysOfSalesOutstanding daysOfInventoryOutstanding operatingCycle daysOfPayablesOutstanding ... priceToSalesRatio priceEarningsRatio priceToFreeCashFlowsRatio priceToOperatingCashFlowsRatio priceCashFlowRatio priceEarningsToGrowthRatio priceSalesRatio dividendYield enterpriseValueMultiple priceFairValue
0 AAPL 2021-09-25 FY 1.074553 0.909660 0.278449 51.390969 11.276593 62.667561 93.851071 ... 6.786117 26.219656 26.706799 23.861253 23.861253 0.367742 6.786117 0.005828 20.889553 39.348186
1 AAPL 2020-09-26 FY 1.363604 1.218195 0.360710 49.787534 8.741883 58.529418 91.048190 ... 7.272322 34.773150 27.211359 24.746031 24.746031 3.277438 7.272322 0.007053 25.558891 30.553901
2 AAPL 2019-09-28 FY 1.540126 1.384447 0.462022 64.258765 9.263639 73.522404 104.314077 ... 4.420394 20.813515 19.527159 16.573786 16.573786 -62.492578 4.420394 0.012277 14.772472 12.709658
3 AAPL 2018-09-29 FY 1.123843 0.986566 0.221733 67.332499 8.817631 76.150130 124.570214 ... 3.959898 17.666917 16.402259 13.582267 13.582267 0.597709 3.959898 0.013038 13.099961 9.815760
4 AAPL 2017-09-30 FY 1.276063 1.089670 0.201252 56.800671 12.563631 69.364302 126.927606 ... 3.794457 17.989671 17.121402 13.676823 13.676823 1.632758 3.794457 0.014680 12.605749 6.488908

import pandas
print(pandas.DataFrame(data))
I guess maybe what you are trying to do...

Related

Get historical data from binance

I am trying to extract historical data between [curr_time - 2years, curr_time]. Time gap is 1 day. So, I expect about 700 items, but i received only 3 items.
How can I fix this problem?
My code
from binance.client import Client
# Binance test_key https://testnet.binance.vision/key/generate
API_KEY = "---"
API_SECRET = "---"
DAYS_IN_YEAR = 365
DB_NAME = "charts"
def GetHistoricalData(
timedelta_days=DAYS_IN_YEAR * 2,
ticker="BTCUSDT",
kline_interval=Client.KLINE_INTERVAL_1HOUR
):
start_time = time.time()
untilThisDate = datetime.datetime.now()
sinceThisDate = untilThisDate - datetime.timedelta(days=timedelta_days)
print("ZZZZZZZZZ_ ", str(sinceThisDate), str(untilThisDate)) # 2019-11-06 00:23:43.620016 2021-11-05 00:23:43.620016
client = Client(API_KEY, API_SECRET)
client.API_URL = 'https://testnet.binance.vision/api'
candle = client.get_historical_klines(ticker, kline_interval, str(sinceThisDate), str(untilThisDate))
print("CANDLE_", len(candle)) # 3
I tried this request:
candle = client.get_historical_klines(ticker, kline_interval, "01 January, 2019", "04 November 2021")
but received only 3 items again
dateTime ...
2021-11-02 00:00:00 61722.80000000 150535.61000000 ... 448.99018200 1635897599999
2021-11-03 00:00:00 63208.69000000 100000.00000000 ... 451.03367500 1635983999999
2021-11-04 00:00:00 62894.04000000 70000.00000000 ... 401.86212800 1636070399999
Well....
If you try to request this data with API call it will give you:
In [1]: import requests
...: len(requests.get('https://testnet.binance.vision/api/v3/klines?symbol=BTCUSDT&interval=1h&limit=1000').json())
Out[1]: 65
but if you try to run it with production env of binance (btw klines/candles is a public data and don't require apiKey):
In [2]: import requests
...: len(requests.get('https://api.binance.com/api/v3/klines?symbol=BTCUSDT&interval=1h&limit=1000').json())
Out[2]: 1000
So, to fix you example, you need replace BASE_URL
client.API_URL = 'https://api.binance.com/api'
It gives me:
ZZZZZZZZZ_ 2019-11-06 01:15:15.122873 2021-11-05 01:15:15.122873
CANDLE_ 17483
Try the code below. I get a bunch of data, but its not formatted:
import datetime
from binance.client import Client
import time
# Binance test_key https://testnet.binance.vision/key/generate
API_KEY = "---"
API_SECRET = "---"
DAYS_IN_YEAR = 365
DB_NAME = "charts"
def GetHistoricalData(
timedelta_days=DAYS_IN_YEAR * 2,
ticker="BTCUSDT",
kline_interval=Client.KLINE_INTERVAL_1HOUR
):
start_time = time.time()
untilThisDate = datetime.datetime.now()
sinceThisDate = untilThisDate - datetime.timedelta(days=timedelta_days)
print("ZZZZZZZZZ_ ", str(sinceThisDate),
str(untilThisDate)) # 2019-11-06 00:23:43.620016 2021-11-05 00:23:43.620016
client = Client(API_KEY, API_SECRET)
client.API_URL = 'https://testnet.binance.vision/api'
candle = client.get_historical_klines(ticker, kline_interval, str(sinceThisDate), str(untilThisDate))
print(candle)
GetHistoricalData()

KeyError when trying to select subset of DataFrame

I'm getting a KeyError on my 'Ticker' column when trying to select only AAPL stock and I'm not sure why... I've also tried using aapl = all_data.loc[all_data['Ticker'] == 'AAPL']
import pandas_datareader as pdr
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def get(tickers, startdate, enddate):
def data(ticker):
return (pdr.get_data_yahoo(ticker, start=startdate, end=enddate))
datas = map (data, tickers)
return(pd.concat(datas, keys=tickers, names=['Ticker', 'Date']))
tickers = ['AAPL', 'MSFT', 'IBM', 'GOOG']
all_data = get(tickers, datetime.datetime(2006, 10, 1), datetime.datetime(2012, 1, 1))
print(all_data.head())
aapl = all_data[all_data['Ticker'] == 'AAPL']
This is probably what you you are looking for
all_data[all_data.index.get_loc("AAPL")]
outputs
High Low Open Close Volume \
Ticker Date
AAPL 2006-10-02 2.709643 2.653571 2.682143 2.673571 712639200.0
2006-10-03 2.676786 2.613929 2.658929 2.645714 790708800.0
2006-10-04 2.695000 2.612857 2.646429 2.692143 829082800.0
2006-10-05 2.720000 2.647500 2.661786 2.672500 683883200.0
2006-10-06 2.680000 2.636071 2.657857 2.650714 466958800.0
... ... ... ... ... ...
2011-12-23 14.413929 14.267500 14.274643 14.404643 269399200.0
2011-12-27 14.610357 14.393572 14.396428 14.518929 265076000.0
2011-12-28 14.580358 14.333571 14.531786 14.380000 228662000.0
2011-12-29 14.487500 14.303928 14.407143 14.468572 215978000.0
2011-12-30 14.510000 14.410357 14.411072 14.464286 179662000.0

Update row after comparing values on pandas dataframe

I connect to an API that provides covid-19 data in Brazil organized by state and city, as follows:
#Bibliotecas
import pandas as pd
from pandas import Series, DataFrame, Panel
import matplotlib.pyplot as plt
from matplotlib.pyplot import plot_date, axis, show, gcf
import numpy as np
from urllib.request import Request, urlopen
import urllib
from http.cookiejar import CookieJar
import numpy as np
from datetime import datetime, timedelta
cj = CookieJar()
url_Bso = "https://brasil.io/api/dataset/covid19/caso_full/data?state=MG&city=Barroso"
req_Bso = urllib.request.Request(url_Bso, None, {"User-Agent": "python-urllib"})
opener_Bso = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
response_Bso = opener_Bso.open(req_Bso)
raw_response_Bso = response_Bso.read()
json_Bso = pd.read_json(raw_response_Bso)
results_Bso = json_Bso['results']
results_Bso = results_Bso.to_dict().values()
df_Bso = pd.DataFrame(results_Bso)
df_Bso.head(5)
This Api compiles the data released by the state health departments. However, there is a difference between the records of the state and city health departments, and the state records are out of date in relation to those of the cities. I would like to update Thursdays and Saturdays (the day when the epidemiological week ends). I'm trying the following:
saturday = datetime.today() + timedelta(days=-5)
yesterday = datetime.today() + timedelta(days=-1)
last_available_confirmed_day_Bso_saturday = 51
last_available_confirmed_day_Bso_yesterday = 54
df_Bso = df_Bso.loc[df_Bso['date'] == saturday, ['last_available_confirmed']] = last_available_confirmed_day_Bso_saturday
df_Bso = df_Bso.loc[df_Bso['date'] == yesterday, ['last_available_confirmed']] = last_available_confirmed_day_Bso_yesterday
df_Bso
However, I get the error:
> AttributeError: 'int' object has no attribute 'loc'
I need another dataframe with the values of these days updates. Can anyone help?
You have to adjust the date. Your data frame date column is a string. You can convert them to datetime.
today = datetime.now()
last_sat_num = (today.weekday() + 2) % 7
last_thu_num = (today.weekday() + 4) % 7
last_sat = today - timedelta(last_sat_num)
last_thu = today - timedelta(last_thu_num)
last_sat_str = last_sat.strftime('%Y-%m-%d')
last_thu_str = last_thu.strftime('%Y-%m-%d')
last_available_confirmed_day_Bso_sat = 51
last_available_confirmed_day_Bso_thu = 54
df_Bso2 = df_Bso.copy()
df_Bso2.loc[df_Bso2['date'] == last_sat_str, ['last_available_confirmed']] = last_available_confirmed_day_Bso_sat
df_Bso2.loc[df_Bso2['date'] == last_thu_str, ['last_available_confirmed']] = last_available_confirmed_day_Bso_thu
df_Bso2[['date', 'last_available_confirmed']].head(10)
Output
date last_available_confirmed
0 2020-07-15 44
1 2020-07-14 43
2 2020-07-13 40
3 2020-07-12 40
4 2020-07-11 51
5 2020-07-10 39
6 2020-07-09 36
7 2020-07-08 36
8 2020-07-07 27
9 2020-07-06 27

re-iterate over and over rather than once in soup

I keep re-iterating over this code. I'm keen to scrape all past results data from this site yet i keep looping over one by one?
for example race_number printed goes 1, 1,2, 1,2,3 etc etc
End goal is to full all list with data and panda it out to look at results and trends.
import requests
import csv
import os
import numpy
import pandas
from bs4 import BeautifulSoup as bs
with requests.Session() as s:
webpage_response = s.get('http://www.harness.org.au/racing/fields/race-fields/?mc=SW010420')
soup = bs(webpage_response.content, "html.parser")
#soup1 = soup.select('.content')
results = soup.find_all('div', {'class':'forPrint'})
race_number = []
race_name = []
race_title = []
race_distance = []
place = []
horse_name = []
Prizemoney = []
Row = []
horse_number = []
Trainer = []
Driver = []
Margin = []
Starting_odds = []
Stewards_comments = []
Scratching = []
Track_Rating = []
Gross_Time = []
Mile_Rate = []
Lead_Time = []
First_Quarter = []
Second_Quarter = []
Third_Quarter = []
Fourth_Quarter = []
for race in results:
race_number1 = race.find(class_='raceNumber').get_text()
race_number.append(race_number1)
race_name1 = race.find(class_='raceTitle').get_text()
race_name.append(race_name1)
race_title1 = race.find(class_='raceInformation').get_text(strip=True)
race_title.append(race_title1)
race_distance1 = race.find(class_='distance').get_text()
race_distance.append(race_distance1)
Need help fixing iteration over and over, and what is the next best move to look at table data rather than headers above?
Cheers
Is this the output you are expecting:
import requests
import csv
import os
import numpy
import pandas as pd
import html
from bs4 import BeautifulSoup as bs
with requests.Session() as s:
webpage_response = s.get('http://www.harness.org.au/racing/fields/race-fields/?mc=SW010420')
soup = bs(webpage_response.content, "html.parser")
#soup1 = soup.select('.content')
data = {}
data["raceNumber"] = [ i['rowspan'] for i in soup.find_all("td", {"class": "raceNumber", "rowspan": True})]
data["raceTitle"] = [ i.get_text(strip=True) for i in soup.find_all("td", {"class": "raceTitle"})]
data["raceInformation"] = [ i.get_text(strip=True) for i in soup.find_all("td", {"class": "raceInformation"})]
data["distance"] = [ i.get_text(strip=True) for i in soup.find_all("td", {"class": "distance"})]
print(data)
data_frame = pd.DataFrame(data)
print(data_frame)
## Output
## raceNumber raceTitle raceInformation distance
##0 3 PREMIX KING PACE $4,500\n\t\t\t\t\t4YO and older.\n\t\t\t\t\tNR... 1785M
##1 3 GATEWAY SECURITY PACE $7,000\n\t\t\t\t\t4YO and older.\n\t\t\t\t\tNR... 2180M
##2 3 PERRY'S FOOTWEAR TROT $7,000\n\t\t\t\t\t\n\t\t\t\t\tNR 46 to 55.\n\t... 2180M
##3 3 DELAHUNTY PLUMBING 3YO TROT $7,000\n\t\t\t\t\t3YO.\n\t\t\t\t\tNR 46 to 52.... 2180M
##4 3 RAYNER'S FRUIT & VEGETABLES 3YO PACE $7,000\n\t\t\t\t\t3YO.\n\t\t\t\t\tNR 48 to 56.... 2180M
##5 3 KAYE MATTHEWS TRIBUTE $9,000\n\t\t\t\t\t4YO and older.\n\t\t\t\t\tNR... 2180M
##6 3 TALQUIST TREES PACE $7,000\n\t\t\t\t\t\n\t\t\t\t\tNR 62 to 73.\n\t... 2180M
##7 3 WEEKLY ADVERTISER 3WM PACE $7,000\n\t\t\t\t\t\n\t\t\t\t\tNR 56 to 61.\n\t... 1785M

Why is python only printing one data set in the algorithm?

So I am trying to build a trading software and I am using the code from an online YouTuber. I am gathering all of the data for the companies on the S&P 500 in the get_data_from_yahoo() function. So when I run that code it says Already Have (then the given ticker) which is fine, but when I got to print the data for this in the following function, which is compile_data(), it only print one ticker which is ZTS.
Anyone have any ideas?
import bs4 as bs
import datetime as dt
import os
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import fix_yahoo_finance as yf
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.replace('.', '-')
ticker = ticker[:-1]
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2019, 6, 8)
end = dt.datetime.now()
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = pdr.get_data_yahoo(ticker, start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
save_sp500_tickers()
get_data_from_yahoo()
def complied_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
complied_data()
When I run this code this is what it says:
MMM
Already have MMM
ABT
Already have ABT
ABBV
Already have ABBV
ABMD
Already have ABMD
ACN
Already have ACN
ATVI
Already have ATVI
ADBE
Already have ADBE
AMD
Already have AMD
AAP
Already have AAP
AES
Already have AES
AMG
Already have AMG
AFL
Already have AFL
A
Already have A
APD
Already have APD
AKAM
Already have AKAM
ALK
Already have ALK
ALB
Already have ALB
It then continues to say that it already has all of the 500 companies(I did not show the hole thing because the list is very long). But when I run the compile_data()
function it only prints the data for one ticker:
ZTS
Date
2019-01-02 83.945038
2019-01-03 81.043526
2019-01-04 84.223267
2019-01-07 84.730026
2019-01-08 85.991997
The problem is in a for loop, specifically the one in complied_data.
The if-else and if blocks should be included in the for loop:
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
Otherwise they will be evaluated only after it is done looping and elaborate the last element.
The following is the output when changing to the above indentation:
(... omitted counting from 0)
470
480
490
500
MMM ABT ABBV ABMD ... YUM ZBH ZION ZTS
Date ...
2019-06-10 165.332672 80.643486 74.704918 272.429993 ... 107.794380 121.242027 43.187107 109.920105
2019-06-11 165.941788 80.494644 75.889320 262.029999 ... 106.722885 120.016762 43.758469 109.860268
2019-06-12 166.040024 81.318237 76.277657 254.539993 ... 108.082100 120.225945 43.512192 111.136780
2019-06-13 165.882843 81.655624 76.646561 255.529999 ... 108.121788 119.329407 44.063854 109.730621
2019-06-14 163.760803 81.586166 76.394157 250.960007 ... 108.925407 116.998398 44.211620 110.488556
[5 rows x 505 columns]

Categories

Resources