how to write specific column from api to pandas dataframe - python

I use API, got many data but need to write only one to Pandas dataframe and then to csv file
how can i do this? i need currency and rate columns only
import requests
import pandas as pd
url = 'https://api.apilayer.com/exchangerates_data/latest?base=EUR'
get_response = requests.get(url)
print(get_response.content)
reponse is
b'{\n "success": true,\n "timestamp": 1653291723,\n "base": "EUR",\n "date": "2022-05-23",\n "rates": {\n "AED": 3.891874,\n "AFN": 96.332724,\n "ALL": 120.12076,\n "AMD": 486.326147,\n "ANG": 1.910798,\n "AOA": 440.146399,\n "ARS": 125.559742,\n "AUD": 1.49136,\n "AWG": 1.907774,\n "AZN": 1.796984,\n "BAM": 1.958501,\n "BBD": 2.140687,\n "BDT": 92.747171,\n "BGN": 1.955884,\n "BHD": 0.399459,\n "BIF": 2179.202426,\n "BMD": 1.05958,\n "BND": 1.461113,\n "BOB": 7.300355,\n "BRL": 5.170217,\n "BSD": 1.060231,\n "BTC": 3.4686139e-05,\n "BTN": 82.245346,\n "BWP": 12.83561,\n "BYN": 3.578179,\n "BYR": 20767.765076,\n "BZD": 2.137314,\n "CAD": 1.355568,\n "CDF": 2124.457448,\n "CHF": 1.030337,\n "CLF": 0.032122,\n "CLP": 886.340415,\n "CNY": 7.067924,\n "COP": 4208.651167,\n "CRC": 711.74061,\n "CUC": 1.05958,\n "CUP": 28.078866,\n "CVE": 110.417129,\n "CZK": 24.584356,\n "DJF": 188.746891,\n "DKK": 7.440804,\n "DOP": 58.552672,\n "DZD": 154.438592,\n "EGP": 19.350685,\n "ERN": 15.893699,\n "ETB": 55.044138,\n "EUR": 1,\n "FJD": 2.284134,\n "FKP": 0.867087,\n "GBP": 0.843796,\n "GEL": 3.078048,\n "GGP": 0.867087,\n "GHS": 8.244159,\n "GIP": 0.867087,\n "GMD": 57.376242,\n "GNF": 9372.913662,\n "GTQ": 8.136894,\n "GYD": 221.832492,\n "HKD": 8.316976,\n "HNL": 26.044688,\n "HRK": 7.531387,\n "HTG": 118.758621,\n "HUF": 382.210602,\n "IDR": 15541.387462,\n "ILS": 3.544575,\n "IMP": 0.867087,\n "INR": 82.241939,\n "IQD": 1547.570278,\n "IRR": 44820.228002,\n "ISK": 138.696976,\n "JEP": 0.867087,\n "JMD": 163.866665,\n "JOD": 0.751235,\n "JPY": 135.279211,\n "KES": 123.494294,\n "KGS": 84.628222,\n "KHR": 4306.032696,\n "KMF": 494.770756,\n "KPW": 953.622101,\n "KRW": 1339.844028,\n "KWD": 0.324317,\n "KYD": 0.883605,\n "KZT": 451.435581,\n "LAK": 14057.368731,\n "LBP": 1603.386389,\n "LKR": 376.408573,\n "LRD": 161.585115,\n "LSL": 16.867967,\n "LTL": 3.128664,\n "LVL": 0.640929,\n "LYD": 5.0991,\n "MAD": 10.616176,\n "MDL": 20.305389,\n "MGA": 4285.919145,\n "MKD": 61.538618,\n "MMK": 1962.989296,\n "MNT": 3259.024764,\n "MOP": 8.570086,\n "MRO": 378.269824,\n "MUR": 45.988019,\n "MVR": 16.344051,\n "MWK": 866.149081,\n "MXN": 21.058086,\n "MYR": 4.645208,\n "MZN": 67.632728,\n "NAD": 16.868837,\n "NGN": 439.852629,\n "NIO": 37.997351,\n "NOK": 10.249848,\n "NPR": 131.573277,\n "NZD": 1.638153,\n "OMR": 0.40741,\n "PAB": 1.060346,\n "PEN": 3.9603,\n "PGK": 3.736248,\n "PHP": 55.384769,\n "PKR": 213.235927,\n "PLN": 4.617629,\n "PYG": 7251.300917,\n "QAR": 3.857965,\n "RON": 4.946967,\n "RSD": 117.499439,\n "RUB": 62.329807,\n "RWF": 1088.946708,\n "SAR": 3.974436,\n "SBD": 8.607563,\n "SCR": 14.438411,\n "SDG": 473.497309,\n "SEK": 10.486142,\n "SGD": 1.457722,\n "SHP": 1.459462,\n "SLL": 13581.160767,\n "SOS": 618.272012,\n "SRD": 22.260709,\n "STD": 21931.163629,\n "SVC": 9.277651,\n "SYP": 2662.141945,\n "SZL": 16.788471,\n "THB": 36.311502,\n "TJS": 13.261298,\n "TMT": 3.708529,\n "TND": 3.243904,\n "TOP": 2.459973,\n "TRY": 16.843815,\n "TTD": 7.198816,\n "TWD": 31.379439,\n "TZS": 2464.582977,\n "UAH": 31.322712,\n "UGX": 3864.924954,\n "USD": 1.05958,\n "UYU": 42.945451,\n "UZS": 11764.462107,\n "VEF": 226570195087.7927,\n "VND": 24545.167244,\n "VUV": 121.073594,\n "WST": 2.73302,\n "XAF": 656.781309,\n "XAG": 0.048294,\n "XAU": 0.000571,\n "XCD": 2.863568,\n "XDR": 0.790993,\n "XOF": 656.781309,\n "XPF": 120.315233,\n "YER": 265.159952,\n "ZAR": 16.77781,\n "ZMK": 9537.495082,\n "ZMW": 18.060768,\n "ZWL": 341.18428\n }\n}\n'

first load data as DataFrame
import json
df = json.loads(get_response.content)
second, choose the rates (currency is index) and save to csv
df[["base","rates"]].to_csv("path/to/csv")

import requests
import pandas as pd
import json
url = 'https://api.apilayer...'
get_response = requests.get(url)
# Parse the response to a dict
response_dict = get_response.json()
# Turn the rates nodes into a dataframe
data_items = response_dict['rates'].items()
data_list = list(data_items)
df = pd.DataFrame(data_list,columns=['currency','rate'])
# Export to csv
df.to_csv('export.csv')

IIUC, you can use:
import json
df = pd.DataFrame(json.loads(get_response.content.decode('utf-8')))[['base', 'rates']]
# for export to csv
# df.to_csv('filename.csv')
output:
base rates
AED EUR 3.893237
AFN EUR 96.366461
ALL EUR 120.162829
AMD EUR 486.496485
ANG EUR 1.911467
.. ... ...
YER EUR 265.252922
ZAR EUR 16.796411
ZMK EUR 9540.830787
ZMW EUR 18.067093
ZWL EUR 341.303769
[168 rows x 2 columns]

get_response = requests.get(url)
# print(get_response.content)
in_json = get_response.json()
# print(in_json)
fd = pd.DataFrame(in_json)
y = fd[['rates']]
print(y)
solved but looks not nice and maybe there a more simple solution
i converted to json - then to dataframe - then i will convert to csv

Related

Writing data from an API to a CSV

Basically I have this code that is working for me and its purpose is to download an entire series from an API about how many times a stock ticker is mentioned on the wallstreetbets sub.
This is the code:
import requests
tickers = open("ticker_list.txt","r")
for ticker in tickers:
ticker = ticker.strip()
url = "https://XXX SENSIBLE INFO/historical/wallstreetbets/"+ticker
headers = {'XXX (SENSIBLE INFO'}
r = requests.get(url, headers=headers)
print(r.content)
Where the file .txt is a simple list with about 8000 stock simbols.
I show you what are the first lines of the output, just for an example:
b'[{"Date": "2018-08-10", "Ticker": "AA", "Mentions": 1}, {"Date": "2018-08-28", "Ticker": "AA", "Mentions": 1}, {"Date": "2018-09-07", "Ticker": "AA", "Mentions": 1}, etc...
b'[{"Date": "2020-12-07", "Ticker": "AACQ", "Mentions": 1}, {"Date": "2020-12-08", "Ticker": "AACQ", "Mentions": 1}, {"Date": "2020-12-22", "Ticker": "AACQ", "Mentions": 1},... etc...
b'[{"Date": "2018-08-08", "Ticker": "AAL", "Mentions": 1}, {"Date": "2018-08-20", "Ticker": "AAL", "Mentions": 1}, {"Date": "2018-09-11", "Ticker": "AAL", "Mentions": 1}, .... etc
What I want to do now is to store all the data in a csv file so that the resulting table would be interpreted like this:
AA
AACQ
AAL
......
1/1/2018
3
3
7
...
2/1/2018
45
89
3
....
3/1/2018
21
4
2
......
....
(where the numbers in the middle represents the mentions per date per ticker, in this case to simplify I just put random numbers but they need to be the same numbers i got on the output as "mentions")
Alternatively, if it's easier, I need to create a single csv file for every ticker with the date in the first column and the numbers of mentions in the second column
The data that is being returned from the site is in JSON format, so this could be converted into a Python data structure using r.json(). Next, two things will help you here. Firstly a Counter can be used to keep track of all of the Mentions in your json data, and a defaultdict can be used to build a per date entry for each ticker. The set all_tickers can be used to keep track of all the tickers seen in the data and then be used to form the header for your output CSV file.
For example:
from collections import defaultdict, Counter
from datetime import datetime
import requests
import csv
dates = defaultdict(Counter)
all_tickers = set()
tickers = open("ticker_list.txt")
for ticker in tickers:
ticker = ticker.strip()
url = f"https://XXX SENSIBLE INFO/historical/wallstreetbets/{ticker}"
headers = {'XXX (SENSIBLE INFO'}
r = requests.get(url, headers=headers)
for row in r.json():
all_tickers.add(row['Ticker'])
date = datetime.strptime(row['Date'], '%Y-%m-%d') # convert to datetime format
dates[date][row['Ticker']] += row['Mentions']
with open('output.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=['Date', *sorted(all_tickers)])
csv_output.writeheader()
for date, values in sorted(dates.items(), key=lambda x: x[0]):
row = {'Date' : date.strftime('%d/%m/%Y')} # Create an output date format of day/month/year
row.update(values)
csv_output.writerow(row)
This should produce the output you need.

Convert download string in pattern like [{" t": "1 ", "id": "NOW.976818" .... "cv": "1"}] into Pd dataframe?

I downloaded list of news content into pandas dataframe. Instead of putting the info into table, pd put everything into a single cell. Upon inspection, the downloaded string is in this pattern:
"['[{"t": "1", "id": "NOW.976818", "dt": "2019/11/15 10:13", "h": "《美股業績》Nvidia季績勝預期 季度收入預測遜預期", "u": "",...
How to convert this into pd table?
My codes:
urlpull ="http://www.aastocks.com/tc/resources/datafeed/getmorenews.ashx?cat=result-announcement&newstime=942660890&newsid=NOW.976800&period=0&key="
df = pd.DataFrame({'News': ['a'], 'Page': ['1']})
result = requests.get(urlpull)
result.raise_for_status()
result.encoding = "utf-8"
src = result.content
soup = BeautifulSoup(src, 'lxml')
news = []
for a_tag in soup.find_all('p'):
news.append(a_tag.text)
df = df.append(pd.DataFrame(news, columns=['News']))
print(news)
df['num'] = df['News'].str.extract('(\d{5})')
df["stock_num"] = pd.to_numeric(df["num"], errors="coerce").fillna(0).astype("int64")
print (df)
df.to_excel("News.xlsx")
you can do directly
pd.read_table(filename/url)

How to use pandas DF as params in HTTP request

I have a list of places from an excel file which I would enrich with the geonames Ids. Starting from the excel file I made a pandas Data Frame then I would use the values from the DF as params in my request.
Here the script I made
import pandas as pd
import requests
import json
require_cols = [1]
required_df = pd.read_excel('grp.xlsx', usecols = require_cols)
print(required_df)
url = 'http://api.geonames.org/searchJSON?'
params = { 'username': "XXXXXXXX",
'name_equals': (required_df),
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
print (json.dumps(pretty_json, indent=2))
The problem is related to the defintion of this parameter:
'name_equals': (required_df)
I would use the Places (around 15k) from the DF as param and recoursively retrieve the related geonames ID and write the output in a separate excel file.
The simple request works:
import requests
import json
url = 'http://api.geonames.org/searchJSON?'
params = { 'username': "XXXXXXX",
'name_equals': "Aire",
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
print (json.dumps(pretty_json, indent=2))
#print(e.content)
As well as the definition of Pandas data frame:
# import pandas lib as pd
import pandas as pd
require_cols = [0,1]
# only read specific columns from an excel file
required_df = pd.read_excel('grp.xlsx', usecols = require_cols)
print(required_df)
I also tried via SPARQL without results so I decided to go via Python.
Thanks for your time.
You can use for-loop
import pandas as pd
df = pd.DataFrame({'Places': ['London', 'Paris', 'Berlin']})
for item in df['Places']:
print('requests for:', item)
# ... rest of code ...
or df.apply()
import pandas as pd
def run(item):
print('requests for:', item)
# ... rest of code ...
return 'result for ' + item
df = pd.DataFrame({'Places': ['London', 'Paris', 'Berlin']})
df['Results'] = df['Places'].apply(run)
Thanks #furas for your reply.
I solved like this:
import pandas as pd
import requests
import json
url = 'http://api.geonames.org/searchJSON?'
df = pd.read_excel('Book.xlsx', sheet_name='Sheet1', usecols="B")
for item in df.place_name:
df.place_name.head()
params ={ 'username': "XXXXXX",
'name_equals': item,
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
for item in pretty_json["geonames"]:
print (json.dumps(item["geonameId"], indent=2))
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(item["geonameId"], f, ensure_ascii=False, indent=4)
#print(e.content)
The only problem now is related to the json output: By print I'm having the complete IDs list however, when I'm going to write the output to a file I'm getting just the last ID from the list.

Conversion from nested json to csv with pandas

I am trying to convert a nested json into a csv file, but I am struggling with the logic needed for the structure of my file: it's a json with 2 objects and I would like to convert into csv only one of them, which is a list with nesting.
I've found very helpful "flattening" json info in this blog post. I have been basically adapting it to my problem, but it is still not working for me.
My json file looks like this:
{
"tickets":[
{
"Name": "Liam",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Piano",
"Sports"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "barkele01",
"salary" : 870000
},
{
"Name": "John",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Music",
"Running"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "bedrost01",
"salary" : 550000
}
],
"count": 2
}
my code, so far, looks like this:
import json
from pandas.io.json import json_normalize
import argparse
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Converting json files into csv for Tableau processing')
parser.add_argument(
"-j", "--json", dest="json_file", help="PATH/TO/json file to convert", metavar="FILE", required=True)
args = parser.parse_args()
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
flat_json = flatten_json(json_data)
# normalizing flat json
final_data = json_normalize(flat_json)
with open(args.json_file.replace(".json", ".csv"), "w") as outputFile: # open csv file
# saving DataFrame to csv
final_data.to_csv(outputFile, encoding='utf8', index=False)
What I would like to obtain is 1 line per ticket in the csv, with headings:
Name,Location_City,Location_State,Hobbies_0,Hobbies_1,Year,TeamId,PlayerId,Salary.
I would really appreciate anything that can do the click!
Thank you!
I actually wrote a package called cherrypicker recently to deal with this exact sort of thing since I had to do it so often!
I think the following code would give you exactly what you're after:
from cherrypicker import CherryPicker
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
picker = CherryPicker(data)
flat = picker['tickets'].flatten().get()
df = pd.DataFrame(flat)
print(df)
This gave me the output:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
You can install the package with:
pip install cherrypicker
...and there's more docs and guidance at https://cherrypicker.readthedocs.io.
An you already have a function to flatten a Json object, you have just to flatten the tickets:
...
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
final_data = pd.DataFrame([flatten_json(elt) for elt in json_data['tickets']])
...
With your sample data, final_data is as expected:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
There may be a simpler solution for this. But this should work!
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
df = pd.DataFrame(data['tickets'])
for i,item in enumerate(df['Location']):
df['location_city'] = dict(df['Location'])[i]['City']
df['location_state'] = dict(df['Location'])[i]['State']
for i,item in enumerate(df['hobbies']):
df['hobbies_{}'.format(i)] = dict(df['hobbies'])[i]
df = df.drop({'Location','hobbies'}, axis=1)
print(df)

What is the data format returned by the AdWords API TargetingIdeaPage service?

When I query the AdWords API to get search volume data and trends through their TargetingIdeaSelector using the Python client library the returned data looks like this:
(TargetingIdeaPage){
totalNumEntries = 1
entries[] =
(TargetingIdea){
data[] =
(Type_AttributeMapEntry){
key = "KEYWORD_TEXT"
value =
(StringAttribute){
Attribute.Type = "StringAttribute"
value = "keyword phrase"
}
},
(Type_AttributeMapEntry){
key = "TARGETED_MONTHLY_SEARCHES"
value =
(MonthlySearchVolumeAttribute){
Attribute.Type = "MonthlySearchVolumeAttribute"
value[] =
(MonthlySearchVolume){
year = 2016
month = 2
count = 2900
},
...
(MonthlySearchVolume){
year = 2015
month = 3
count = 2900
},
}
},
},
}
This isn't JSON and appears to just be a messy Python list. What's the easiest way to flatten the monthly data into a Pandas dataframe with a structure like this?
Keyword | Year | Month | Count
keyword phrase 2016 2 10
The output is a sudsobject. I found that this code does the trick:
import suds.sudsobject as sudsobject
import pandas as pd
a = [sudsobject.asdict(x) for x in output]
df = pd.DataFrame(a)
Addendum: This was once correct but new versions of the API (I tested
201802) now return a zeep.objects. However, zeep.helpers.serialize_object should do the same trick.
link
Here's the complete code that I used to query the TargetingIdeaSelector, with requestType STATS, and the method I used to parse the data to a useable dataframe; note the section starting "Parse results to pandas dataframe" as this takes the output given in the question above and converts it to a dataframe. Probably not the fastest or best, but it works! Tested with Python 2.7.
"""This code pulls trends for a set of keywords, and parses into a dataframe.
The LoadFromStorage method is pulling credentials and properties from a
"googleads.yaml" file. By default, it looks for this file in your home
directory. For more information, see the "Caching authentication information"
section of our README.
"""
from googleads import adwords
import pandas as pd
adwords_client = adwords.AdWordsClient.LoadFromStorage()
PAGE_SIZE = 10
# Initialize appropriate service.
targeting_idea_service = adwords_client.GetService(
'TargetingIdeaService', version='v201601')
# Construct selector object and retrieve related keywords.
offset = 0
stats_selector = {
'searchParameters': [
{
'xsi_type': 'RelatedToQuerySearchParameter',
'queries': ['donald trump', 'bernie sanders']
},
{
# Language setting (optional).
# The ID can be found in the documentation:
# https://developers.google.com/adwords/api/docs/appendix/languagecodes
'xsi_type': 'LanguageSearchParameter',
'languages': [{'id': '1000'}],
},
{
# Location setting
'xsi_type': 'LocationSearchParameter',
'locations': [{'id': '1027363'}] # Burlington,Vermont
}
],
'ideaType': 'KEYWORD',
'requestType': 'STATS',
'requestedAttributeTypes': ['KEYWORD_TEXT', 'TARGETED_MONTHLY_SEARCHES'],
'paging': {
'startIndex': str(offset),
'numberResults': str(PAGE_SIZE)
}
}
stats_page = targeting_idea_service.get(stats_selector)
##########################################################################
# Parse results to pandas dataframe
stats_pd = pd.DataFrame()
if 'entries' in stats_page:
for stats_result in stats_page['entries']:
stats_attributes = {}
for stats_attribute in stats_result['data']:
#print (stats_attribute)
if stats_attribute['key'] == 'KEYWORD_TEXT':
kt = stats_attribute['value']['value']
else:
for i, val in enumerate(stats_attribute['value'][1]):
data = {'keyword': kt,
'year': val['year'],
'month': val['month'],
'count': val['count']}
data = pd.DataFrame(data, index = [i])
stats_pd = stats_pd.append(data, ignore_index=True)
print(stats_pd)

Categories

Resources