By using this code:
import pandas as pd
patients_df = pd.read_json('/content/students.json',lines=True)
patients_df.head()
the data are shown in tabular form look like this:
The main json file looks like this:
data = []
for line in open('/content/students.json', 'r'):
data.append(json.loads(line))
How can I get the score column of the table in an organized manner like column name Exam, Quiz, and Homework
Possible solution could be the following:
# pip install pandas
import pandas as pd
import json
def separate_column(row):
for e in row["scores"]:
row[e["type"]] = e["score"]
return row
with open('/content/students.json', 'r') as file:
data = [json.loads(line.rstrip()) for line in file]
df = pd.json_normalize(data)
df = df.apply(separate_column, axis=1)
df = df.drop(['scores'], axis=1)
print(df)
Related
I'm working on a Excel table transformation into another file for database upload. The tables usually looks like this:
The result should be a long list looking like this:
And this is the code I was trying to use...any thoughts?
import pandas as pd
from pandas import DataFrame
import numpy as np
df_excel = pd.read_excel('Excel_Forecast.xlsx', engine='openpyxl')
df_details = df_excel['Details']
df_base = []
for column in df_excel.columns[2:]:
df_base['Details'].append(df_excel['Details'])
df_base = DataFrame(df_base.append(df_excel[(column)]),columns=['Amount'])
df_base.to_excel('Temp.xlsx', index=False)
Use df.melt:
df.melt(['Group', 'Item'])
I was trying to write a function that read a CSV file that looks like this.
flowers.csv
petunia,5.95
alyssum,3.95
begonia,5.95
sunflower,5.95
coelius,4.95
I have tried this code for my function.
def read_csv(csv_pricefile):
import csv
f = open(csv_pricefile)
li = []
for row in csv.reader(f):
li.append(row)
f.close()
print(li)
read_csv("flower.csv")
when I call my function it gives the following output.
[['petunia', '5.95'], ['alyssum', '3.95'], ['begonia', '5.95'], ['sunflower', '5.95'], ['coelius', '4.95']]
But I don't know how to write a function that will take two parameters for example,
read_csv("flowers.csv","alyssum")
If I call the function, it should give me the following output.
3.95
Use pandas library for read csv, this will make a dataframe object
import pandas as pd
df = pd.read_csv('flowers.csv')
df.columns =['flower','price']
Then if you want to know price of any flower
df = df.set_index(['flower'])
f = 'alyssum'
print("{} costs {}".format(f,df.loc[f].price))
Here is my solution I just tried
def read_csv(csv_pricefile,flower):
import csv
f = open(csv_pricefile)
my_dic = {}
for row in csv.reader(f):
myData = {row[0]:row[1]}
my_dic.update(myData)
f.close()
print(my_dic[flower])
read_csv("flower.csv","alyssum")
I have one json file about ansible inventory where I need to select few columns as dataframe and send email notification.
The following is code I tried:
import json
import pandas as pd
from pandas.io.json import json_normalize
with open('d:/facts.json') as f:
d = json.load(f)
mydata = json_normalize(d['ansible_facts'])
mydata.head(1)`
Its printing entire records (actually each json will have only one record), but I need to show/select/display only two columns from dataframe. can some one suggest please how to view dataframe with selected columns
Update 1:
I am able to generate required columns now,but only certain column working, but when i mention certain columns, then its saying "not in index"
And also can i have own column custom header lable while printing ?
Working
import json
import pandas as pd
from pandas.io.json import json_normalize
with open('d:/facts.json') as f:
d = json.load(f)
mydata = json_normalize(d['ansible_facts'])
mydata.columns = mydata.columns.to_series().apply(lambda x: x.strip())
df1=mydata[['ansible_architecture','ansible_distribution']]
But when i mention column as hostname,ansible_distribution, its saying not in index.
Not working
import json
import pandas as pd
from pandas.io.json import json_normalize
with open('d:/facts.json') as f:
d = json.load(f)
mydata = json_normalize(d['ansible_facts'])
mydata.columns = mydata.columns.to_series().apply(lambda x: x.strip())
df1=mydata[['hostname','ansible_distribution']]
Error:
KeyError: "['hostname'] not in index"
Update2:
Now i am able to fix that issue with below, but I need custom label in output, how to do that
`import json
import pandas as pd
from pandas.io.json import json_normalize
with open('d:/facts.json') as f:
d = json.load(f)
mydata = json_normalize(d['ansible_facts'])
mydata.columns = mydata.columns.to_series().apply(lambda x: x.strip())
df1=mydata[['ansible_env.HOSTNAME','ansible_distribution']]`
But i need to have custom columname lable in final output like Host,OSversion for above column, how can i do that?
UPDATE 3: now trying to rename columns name before I print it, tried following code but giving error like key error not in index
import json
import pandas as pd
from tabulate import tabulate
from pandas.io.json import json_normalize
with open('/home/cloud-user/facts.json') as f:
d = json.load(f)
mydata = json_normalize(d['ansible_facts'])
mydata.columns = mydata.columns.to_series().apply(lambda x: x.strip())
mydata=mydata.rename(columns={"ansible_env.HOSTNAME": "HOSTNAME", "ansible_disrribution": "OSType"})
df1=mydata[['HOSTNAME','OSType']]
print(tabulate(df1, headers='keys', tablefmt='psql'))
Traceback (most recent call last):
File "ab7.py", line 21, in <module>
df1=mydata[['HOSTNAME','OSType']]
File "/usr/lib64/python2.7/site-packages/pandas/core/frame.py", line 2682, in __getitem__
return self._getitem_array(key)
File "/usr/lib64/python2.7/site-packages/pandas/core/frame.py", line 2726, in _getitem_array
indexer = self.loc._convert_to_indexer(key, axis=1)
File "/usr/lib64/python2.7/site-packages/pandas/core/indexing.py", line 1327, in _convert_to_indexer
.format(mask=objarr[mask]))
KeyError: "['HOSTNAME' 'OSType'] not in index"
But if i dont rename, it working perfectly, But i need most readable column lable. any suggestion please.
without rename stuff code get works and output as below on console
+----+------------------------+------------------------+
| | ansible_env.HOSTNAME | ansible_distribution |
|----+------------------------+------------------------|
| 0 | ip-xx-xx-xx-xx | SLES |
+----+------------------------+------------------------+
Now instead anisble_env.HOSTNAME --> i need lable as HOSTNAME , instead of ansible_distribution --> I need OSType any suggestion please
Update 4:
I fixed issue with below
df.rename(columns={'ansible_hostname':'HOSTNAME','ansible_distribution':'OS Version','ansible_ip_addresses':'Private IP','ansible_windows_domain':'FQDN'},inplace=True)
Select multiple columns as a DataFrame by passing a list to it:
df[['col_name1', 'col_name2']]
For more information try this link:
https://medium.com/dunder-data/selecting-subsets-of-data-in-pandas-6fcd0170be9c
I'm trying to get an API call and save it as a dataframe.
problem is that I need the data from the 'result' column.
Didn't succeed to do that.
I'm basically just trying to save the API call as a csv file in order to work with it.
P.S when I do this with a "JSON to CSV converter" from the web it does it as I wish. (example: https://konklone.io/json/)
import requests
import pandas as pd
import json
res = requests.get("http://api.etherscan.io/api?module=account&action=txlist&
address=0xddbd2b932c763ba5b1b7ae3b362eac3e8d40121a&startblock=0&
endblock=99999999&sort=asc&apikey=YourApiKeyToken")
j = res.json()
j
df = pd.DataFrame(j)
df.head()
output example picture
Try this
import requests
import pandas as pd
import json
res = requests.get("http://api.etherscan.io/api?module=account&action=txlist&address=0xddbd2b932c763ba5b1b7ae3b362eac3e8d40121a&startblock=0&endblock=99999999&sort=asc&apikey=YourApiKeyToken")
j = res.json()
# print(j)
filename ="temp.csv"
df = pd.DataFrame(j['result'])
print(df.head())
df.to_csv(filename)
Looks like you need.
df = pd.DataFrame(j["result"])
How to write a function in Python that translates each row of a csv file to another language and adds the translation as another column to the same csv using pandas? The input file I have, looks like this:
and I would like my output to be like:
I started with this:
from googletrans import Translator
import pandas as pd
data = pd.read_csv('~/file/my_file.csv')[['A','B']]
df = pd.DataFrame(data, columns=['A','B','A_translation', 'B_translation'])
and for translating a single sentence the following code helps, but could you please help me to use it as a function for all rows in a csv file?
sentence = 'The quick brown fox'
translations = translator.translate(sentence, dest = 'Fr')
for translation in translations:
tr = translation.text
org = translation.origin
Thanks.
Something like that ?
from googletrans import Translator
import pandas as pd
headers = ['A','B','A_translation', 'B_translation']
data = pd.read_csv('./data.csv')
translator = Translator()
# Init empty dataframe with much rows as `data`
df = pd.DataFrame(index=range(0,len(data)), columns=headers)
def translate_row(row):
''' Translate elements A and B within `row`. '''
a = translator.translate(row[0], dest='Fr')
b = translator.translate(row[1], dest='Fr')
return pd.Series([a.origin, b.origin, a.text, b.text], headers)
for i, row in enumerate(data.values):
# Fill empty dataframe with given serie.
df.loc[i] = translate_row(row)
print(df)