How can I put excel data to the dictionary? - python

I wanna put excel data to the dictionary.
Excel is
views.py is
#coding:utf-8
from django.shortcuts import render
import xlrd
book3 = xlrd.open_workbook('./data/excel.xlsx')
sheet3 = book3.sheet_by_index(0)
large_item = None
data_dict = {}
for row_index in range(1,sheet3.nrows):
rows3 = sheet3.row_values(row_index)
large_item = rows3[1] or large_item
data_dict = rows3
Now when I printed out print(data_dict),['', '4', '10', 'Karen', ''] was shown.Before,I wrote data_dict.extend(rows3) in place of data_dict = rows3,but in that time dict has not extend error happens.My ideal output is
data_dict = {
1: {
user_id: 1,
name_id: 1,
name: Blear,
age: 40,
man: false,
employee: leader,
},
2: {
user_id: 2,
name_id: 5,
・
       ・
       ・
},
・
       ・
       ・
}
How should I write to achieve my goal?

Your problem is :
data_dict = rows3
This doesn't add rows3 to data_dict, this set is value. So data_dict is equal to the last row.
To add element to a dict you need to do this:
data_dict[KEY] = VALUE
Your key will be the row index.
Now, you want another dict like VALUE
{
user_id: 1,
name_id: 1,
name: Blear,
age: 40,
man: false,
employee: leader,
}
So for each row you need to construct this dict, use headers and cell value to do it.
I don't test this code, it's just to give you an idea to how to do it.
#coding:utf-8
from django.shortcuts import render
import xlrd
book3 = xlrd.open_workbook('./data/excel.xlsx')
sheet3 = book3.sheet_by_index(0)
headers = sheet3.row_values(0)
large_item = None
data_dict = {}
for row_index in range(1,sheet3.nrows):
rows3 = sheet3.row_values(row_index)
large_item = rows3[1] or large_item
# Create dict with headers and row values
row_data = {}
for idx_col,value in enumerate(rows3):
header_value = headers[idx_col]
# Avoid to add empty column. A column in your example
if header_value:
row_data[headers[idx_col]] = value
# Add row_data to your data_dict with
data_dict[row_index] = row_data

You can use python's library pandas for an easy solution:
from pandas import *
xls = ExcelFile('your_excel_file.xls')
df = xls.parse(xls.sheet_names[0])
df.to_dict()

Related

JSON or CSV from list in python

So I have a code that converts my category tree to a list and I wanted to convert it to CSV/json. Each item on list can have more ids as shown below.
def paths(tree):
tree_name = next(iter(tree.keys()))
if tree_name == 'children':
for child in tree['children']:
for descendant in paths(child):
yield (tree['id'],) + descendant
else:
yield (tree['id'],)
pprint.pprint(list(paths(tree)))
Output
[(461123, 1010022280, 10222044, 2222871,2222890),
(461123, 129893, 119894, 1110100250),
(461123, 98943, 944894, 9893445),
(461123, 9844495)]
Is there any way I can improve my code or have another code that converts list to json that looks below output?
Output should look like this
{
{
"column1": "462312",
"column2": "1010022280",
"column3": "10222044",
"column4": "2222871",
"column5": "2222890"
},
{
"column1": "461123",
"column2": "129893",
"column3": "119894",
"column4": "1110100250"
}
and so on...
}
if csv should look like this. ** Can be up to column 10
column1
column2
column3
column4
461123
129893
119894
1110100250
461123
129893
119894
Following is the code to convert list of tuple to a list of dict which you can convert to json and the second function turns the data to a csv
data = [(461123, 1010022280, 10222044, 2222871,2222890),
(461123, 129893, 119894, 1110100250),
(461123, 98943, 944894, 9893445),
(461123, 9844495)]
def convert_to_list_of_dicts(data):
output_list = []
for i in data:
data_dict = {}
for count,j in enumerate(i) :
data_dict["column" + str(count+1)] = j
output_list.append(data_dict)
return output_list
# print(convert_to_list_of_dicts(data))
def convert_to_csv(data):
max_column_num = 0
for i in data:
if len(i) > max_column_num:
max_column_num = len(i)
columns = ["column" + str(i+1) for i in range(max_column_num)]
newdata = [tuple(columns)]
for tup in data:
newdata.append(tup)
with open('output.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newdata)
# convert_to_csv(data)

constructing a message format from the fetchall result in python

*New to Programming
Question: I need to use the below "Data" (two rows as arrays) queried from sql and use it to create the message structure below.
data from sql using fetchall()
Data = [[100,1,4,5],[101,1,4,6]]
##expected message structure
message = {
"name":"Tom",
"Job":"IT",
"info": [
{
"id_1":"100",
"id_2":"1",
"id_3":"4",
"id_4":"5"
},
{
"id_1":"101",
"id_2":"1",
"id_3":"4",
"id_4":"6"
},
]
}
I tried to create below method to iterate over the rows and then input the values, this is was just a starting, but this was also not working
def create_message(data)
for row in data:
{
"id_1":str(data[0][0],
"id_2":str(data[0][1],
"id_3":str(data[0][2],
"id_4":str(data[0][3],
}
Latest Code
def create_info(data):
info = []
for row in data:
temp_dict = {"id_1_tom":"","id_2_hell":"","id_3_trip":"","id_4_clap":""}
for i in range(0,1):
temp_dict["id_1_tom"] = str(row[i])
temp_dict["id_2_hell"] = str(row[i+1])
temp_dict["id_3_trip"] = str(row[i+2])
temp_dict["id_4_clap"] = str(row[i+3])
info.append(temp_dict)
return info
Edit: Updated answer based on updates to the question and comment by original poster.
This function might work for the example you've given to get the desired output, based on the attempt you've provided:
def create_info(data):
info = []
for row in data:
temp_dict = {}
temp_dict['id_1_tom'] = str(row[0])
temp_dict['id_2_hell'] = str(row[1])
temp_dict['id_3_trip'] = str(row[2])
temp_dict['id_4_clap'] = str(row[3])
info.append(temp_dict)
return info
For the input:
[[100, 1, 4, 5],[101,1,4,6]]
This function will return a list of dictionaries:
[{"id_1_tom":"100","id_2_hell":"1","id_3_trip":"4","id_4_clap":"5"},
{"id_1_tom":"101","id_2_hell":"1","id_3_trip":"4","id_4_clap":"6"}]
This can serve as the value for the key info in your dictionary message. Note that you would still have to construct the message dictionary.

json to dictionary in python

This is my json file input.
{"Report":{"id":101,"type":"typeA","Replist":[{"rptid":"r001","subrpt":{"subid":74,"subname":"name1","subval":113},"RelsubList":[{"Relid":8,"Relsubdetails":{"Rel_subname":"name8","Rel_Subval":65}},{"Relid":5,"Relsubdetails":{"Rel_subname":"name5","Rel_Subval":40}}],"fldA":30,"fldB":23}]}}
...
I am writing python program to convert the input into the below format in my dictionary.
I am new to python.
Expected output:
out: {"id": "101", "type": "typeA", "rptid": "r001", "subrpt_subid": "74", "subrpt_subname": "name1", "subrpt_subval":"113","Relid":"8","Rel_subname":"name8","Rel_Subval":"65","Relid":"5","Rel_subname":"name5","Rel_Subval":"40","fldA":"30","fldB":"23"
I used the following logic to convert the output till subrpt.
Current output:
out: {'id': '101', 'type': 'typeA', 'rptid': 'r001', 'subrpt_subid': '74', 'subrpt_subname': 'name1', 'subrpt_subval': '113'}
But I am struggling to get the logic of RelsubList(it looks like it has both list and dictionary[{}] ).
please help me to get the logic for the same.
import json
list1 = []
dict1 = {}
dict2 = {}
data_file = "samp1.json"
file = open(data_file)
for line in file:
json_line = json.loads(line)
json_line = json_line["Report"]
dict1["id"]=str(json_line["id"])
dict1["type"] = str(json_line["type"])
json_line = json_line["Replist"]
dict1["rptid"]= str(json_line[0]["rptid"])
dict1["subrpt_subid"] = str(json_line[0]["subrpt"]["subid"])
dict1["subrpt_subname"] = str(json_line[0]["subrpt"]["subname"])
dict1["subrpt_subval"] = str(json_line[0]["subrpt"]["subval"])
print("out:", dict1)
Some of your logic is confusing to me, i.e. why are you doing json.loads(line) in every loop?
Anyway, the following should get you the logic for RealsubList:
import json
f = open("data.json")
data = json.load(f)
for line in data:
relsublist = data["Report"]["Replist"][0]["RelsubList"]
print(relsublist)
Results in:
[{'Relid': 8, 'Relsubdetails': {'Rel_subname': 'name8', 'Rel_Subval': 65}}, {'Relid': 5, 'Relsubdetails': {'Rel_subname': 'name5', 'Rel_Subval': 40}}]
The reason for the [0] index after ["Replist"] is Replist contains an array of nested dictionaries, so you need to call it out by index. In this case its only a single array, so it would be 0

Python write mutiple array value into csv

with my code, i read the values of JSON data and insert into array
def retrive_json():
with open('t_v1.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
the result array is this :
[[39.72417, -104.99984, 39.77446, -104.9379], [39.77481, -104.93618, 39.6984, -104.9652]]
how i can write each element of each array into specific field in csv?
i have try in this way:
wrt = csv.writer(open(t_.csv', 'w'), delimiter=',',lineterminator='\n')
for x in jjson:
wrt.writerow([x])
but the value of each array are store all in one field
How can solved it and write each in a field?
this is my json file:
{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}
Let's say we have this:
jsonstring = """{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}"""
Here is a pandas solution:
import pandas as pd
import json
# Load json to dataframe
df = pd.DataFrame(json.loads(jsonstring)["ride"])
# Create the new columns
df["o1"] = df["origin"].apply(lambda x: x["lat"])
df["o2"] = df["origin"].apply(lambda x: x["lng"])
df["d1"] = df["destination"].apply(lambda x: x["lat"])
df["d2"] = df["destination"].apply(lambda x: x["lng"])
#export
print(df.iloc[:,2:].to_csv(index=False, header=True))
#use below for file
#df.iloc[:,2:].to_csv("output.csv", index=False, header=True)
Returns:
o1,o2,d1,d2
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
Condensed answer:
import pandas as pd
import json
with open('data.json') as json_data:
d = json.load(json_data)
df = pd.DataFrame(d["ride"])
df["o1"],df["o2"] = zip(*df["origin"].apply(lambda x: (x["lat"],x["lng"])))
df["d1"],df["d2"] = zip(*df["destination"].apply(lambda x: (x["lat"],x["lng"])))
df.iloc[:,2:].to_csv("t_.csv",index=False,header=False)
Or, maybe the most readable solution:
import json
from pandas.io.json import json_normalize
open('data.json') as json_data:
d = json.load(json_data)
df = json_normalize(d["ride"])
cols = ["origin.lat","origin.lng","destination.lat","destination.lng"]
df[cols].to_csv("output.csv",index=False,header=False)
This might help:
import json
import csv
def retrive_json():
with open('data.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
res = retrive_json()
csv_cols = ["orgin_lat", "origin_lng", "dest_lat", "dest_lng"]
with open("output_csv.csv", 'w') as out:
writer = csv.DictWriter(out, fieldnames=csv_cols)
writer.writeheader()
for each_list in res:
d = dict(zip(csv_cols,each_list))
writer.writerow(d)
Output csv generated is:
orgin_lat,origin_lng,dest_lat,dest_lng
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
To me it looks like you've got an array of arrays and you want the individual elements. Therefore you'll want to use a nested for loop. Your current for loop is getting each array, to then split up each array into it's elements you'll want to loop through those. I'd suggest something like this:
for x in jjson:
for y in x:
wrt.writerow([y])
Obviously you might want to update your bracketing etc this is just me giving you an idea of how to solve your issue.
Let me know how it goes!
Why the csv-Library?
array = [[1, 2, 3, 4], [5, 6, 7, 8]]
with open('test.csv', 'w') as csv_file :
csv_file.write("# Header Info\n" \
"# Value1, Value2, Value3, Value4\n") # The header might be optional
for row in array :
csv_file.write(",".join(row) + "\n")

JSON Parsing help in Python

I have below data in JSON format, I have started with code below which throws a KEY ERROR.
Not sure how to get all data listed in headers section.
I know I am not doing it right in json_obj['offers'][0]['pkg']['Info']: but not sure how to do it correctly.
how can I get to different nodes like info,PricingInfo,Flt_Info etc?
{
"offerInfo":{
"siteID":"1",
"language":"en_US",
"currency":"USD"
},
"offers":{
"pkg":[
{
"offerDateRange":{
"StartDate":[
2015,
11,
8
],
"EndDate":[
2015,
11,
14
]
},
"Info":{
"Id":"111"
},
"PricingInfo":{
"BaseRate":1932.6
},
"flt_Info":{
"Carrier":"AA"
}
}
]
}
}
import os
import json
import csv
f = open('api.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = ['Id' , 'StartDate', 'EndDate', 'Id', 'BaseRate', 'Carrier']
default = ''
writer.writerow(headers)
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
It looks like you're accessing the json incorrectly. After you have accessed json_obj['offers'], you accessed [0], but there is no array there. json_obj['offers'] gives you another dictionary.
For example, to get PricingInfo like you asked, access like this:
json_obj['offers']['pkg'][0]['PricingInfo']
or 11 from the StartDate like this:
json_obj['offers']['pkg'][0]['offerDateRange']['StartDate'][1]
And I believe you get the KEY ERROR because you access [0] in the dictionary, which since that isn't a key, you get the error.
try to substitute this piece of code:
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
With this:
for pkg in json_obj['offers']['pkg']:
row.append(pkg['Info']['Id'])
year = pkg['offerDateRange']['StartDate'][0]
month = pkg['offerDateRange']['StartDate'][1]
day = pkg['offerDateRange']['StartDate'][2]
StartDate = "%d-%d-%d" % (year,month,day)
print StartDate
writer.writerow(row)
Try this
import os
import json
import csv
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["StartDate"][2])
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["EndDate"][2])
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print json_obj["offers"]["pkg"][0]["PricingInfo"]["BaseRate"]
print json_obj["offers"]["pkg"][0]["flt_Info"]["Carrier"]

Categories

Resources