Turn Python Strings into JS Array - python

I trying to parse names from a CSV and turn them into a JS Array, this my first attempt at using python and I'm having trouble getting the right structure for the JSON file. My code is below with the current and desired output, any pointers would be greatly appreciated.
import csv, json
csvPath = "forbes_pub_top_2000.csv"
jsonPath = "pub.json"
# Read CSV, filter Names, add to data
data = {}
with open(csvPath, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
next(csv_reader)
for line in csv_reader:
company = line[2]
data[company] = line[2]
# Add data to root node
root = {}
root["names"] = data
# Write data to JSON file
with open(jsonPath, 'w') as json_file:
json_file.write(json.dumps(root, indent=4))
Current output:
{
"names": {
"ICBC": "ICBC",
"China Construction Bank": "China Construction Bank",
"Berkshire Hathaway": "Berkshire Hathaway",
"JPMorgan Chase": "JPMorgan Chase",
"Wells Fargo": "Wells Fargo",
"Agricultural Bank of China": "Agricultural Bank of China",
"Bank of America": "Bank of America",
"Bank of China": "Bank of China",
...
}
Desired Output:
{
"names": ["ICBC", "China Construction Bank", "Berkshire Hathaway", "JPMorgan Chase", "Wells Fargo", "Agricultural Bank of China", "Bank of America", "Bank of China", ... ]
}

Instead of this:
for line in csv_reader:
company = line[2]
data[company] = line[2]
do this:
for line in csv_reader:
data.append(line[2])
You will also need to make data a list, not a dict:
data = []

Related

How to I add a header to a tsv in python?

persons = [
{"name":"howard", "adress":"New Jersey", "blood group":"AB"},
{"name":"harry", "adress":"New York", "blood group":"O"},
]
output_file = "outputfile.tsv"
with open(outfilename, "w") as output:
for row in persons:
column_values = row.values()
line = "\t".join(column_values) + '\n'
output.write(line)
I tried using methods for csv but it didnt work furthermore I tried changing the dictionary but was not succesufull
Use csv module. In particular csv.DictWriter(). It can add the header using the dict keys as the field names and writeheader() to create the header. Then you write out the data using writerows().
import csv
persons = [
{"name":"howard", "adress":"New Jersey", "blood group":"AB"},
{"name":"harry", "adress":"New York", "blood group":"O"},
]
output_file = "outputfile.tsv"
with open(output_file, 'w') as csv_file:
hdr = persons[0].keys()
csvDictR = csv.DictWriter(csv_file, hdr, delimiter='\t')
csvDictR.writeheader()
csvDictR.writerows(persons)
cat outputfile.tsv
name adress blood group
howard New Jersey AB
harry New York O

CSV files - How do I replace a row of data through user input?

I have made a CSV file where it stores a book, its author and the year it was published. I then made it where the program will display the file's data as a list to the user. I want to now ask the user to select a row and replace it with a different set of data. I then want this data back to the original CSV file, overwriting the existing data with the amended one. How can I do this?
Here is my code so far:
import csv
with open("Books.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["", "Book", "Author", "Year released"])
writer.writerow([0, "To kill a Mockingbird", "Harper Lee", "1960"])
writer.writerow([1, "A Brief History of Time", "Stephan Hawking", "1988"])
writer.writerow([2, "The Great Gatsby", "F.Scott Fitzgerald", "1922"])
writer.writerow([3, "The Man Who Mistook His Wife For a Hat", "Oliver Sacks", "1985"])
writer.writerow([4, "Pride and Prejudice", "Jane Austen", "1813"])
books = []
with open("books.csv", "r", newline="") as file2:
reader = csv.reader(file2)
for row in reader:
count, book, author, year_released = row
print(row)
Read the file in as a list. Then modify that list. Then write that list back to disk.
import csv
with open("Books.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["", "Book", "Author", "Year released"])
writer.writerow([0, "To kill a Mockingbird", "Harper Lee", "1960"])
writer.writerow([1, "A Brief History of Time", "Stephan Hawking", "1988"])
writer.writerow([2, "The Great Gatsby", "F.Scott Fitzgerald", "1922"])
writer.writerow([3, "The Man Who Mistook His Wife For a Hat", "Oliver Sacks", "1985"])
writer.writerow([4, "Pride and Prejudice", "Jane Austen", "1813"])
books = []
with open("books.csv", "r", newline="") as file2:
reader = csv.reader(file2)
books = list(reader)
print(*books, sep='\n')
line = input("select line: ")
title = input("title: ")
author = input("author: ")
year = input("year: ")
books[int(line) + 1] = [line, title, author, year]
with open("Books.csv", 'w', newline="") as file3:
writer = csv.writer(file3)
writer.writerows(books)
print(*books, sep='\n')

Conversion from nested json to csv with pandas

I am trying to convert a nested json into a csv file, but I am struggling with the logic needed for the structure of my file: it's a json with 2 objects and I would like to convert into csv only one of them, which is a list with nesting.
I've found very helpful "flattening" json info in this blog post. I have been basically adapting it to my problem, but it is still not working for me.
My json file looks like this:
{
"tickets":[
{
"Name": "Liam",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Piano",
"Sports"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "barkele01",
"salary" : 870000
},
{
"Name": "John",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Music",
"Running"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "bedrost01",
"salary" : 550000
}
],
"count": 2
}
my code, so far, looks like this:
import json
from pandas.io.json import json_normalize
import argparse
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Converting json files into csv for Tableau processing')
parser.add_argument(
"-j", "--json", dest="json_file", help="PATH/TO/json file to convert", metavar="FILE", required=True)
args = parser.parse_args()
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
flat_json = flatten_json(json_data)
# normalizing flat json
final_data = json_normalize(flat_json)
with open(args.json_file.replace(".json", ".csv"), "w") as outputFile: # open csv file
# saving DataFrame to csv
final_data.to_csv(outputFile, encoding='utf8', index=False)
What I would like to obtain is 1 line per ticket in the csv, with headings:
Name,Location_City,Location_State,Hobbies_0,Hobbies_1,Year,TeamId,PlayerId,Salary.
I would really appreciate anything that can do the click!
Thank you!
I actually wrote a package called cherrypicker recently to deal with this exact sort of thing since I had to do it so often!
I think the following code would give you exactly what you're after:
from cherrypicker import CherryPicker
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
picker = CherryPicker(data)
flat = picker['tickets'].flatten().get()
df = pd.DataFrame(flat)
print(df)
This gave me the output:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
You can install the package with:
pip install cherrypicker
...and there's more docs and guidance at https://cherrypicker.readthedocs.io.
An you already have a function to flatten a Json object, you have just to flatten the tickets:
...
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
final_data = pd.DataFrame([flatten_json(elt) for elt in json_data['tickets']])
...
With your sample data, final_data is as expected:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
There may be a simpler solution for this. But this should work!
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
df = pd.DataFrame(data['tickets'])
for i,item in enumerate(df['Location']):
df['location_city'] = dict(df['Location'])[i]['City']
df['location_state'] = dict(df['Location'])[i]['State']
for i,item in enumerate(df['hobbies']):
df['hobbies_{}'.format(i)] = dict(df['hobbies'])[i]
df = df.drop({'Location','hobbies'}, axis=1)
print(df)

Not getting expected output in python when converting a csv to json

I have an excel file in which data is saved in csv format in such a way.This data is present in the excel file as shown below,under column A (The CSV File is generated by LabView Software code which i have written to generate data).I have also attached an image of the csv file for reference at the end of my question.
RPM,Load Current,Battery Output,Power Capacity
1200,30,12,37
1600,88,18,55
I want to create a Json file in such format
{
"power_capacity_data" :
{
"rpm" : ["1200","1600"],
"load_curr" : ["30","88"],
"batt_output" : ["12","18"],
"power_cap" : ["37","55"]
}
}
This is my code
import csv
import json
def main():
#created a dictionary so that i can append data to it afterwards
power_data = {"rpm":[],"load_curr":[],"batt_output":[],"power_cap":[]}
with open('power1.lvm') as f:
reader = csv.reader(f)
#trying to append the data of column "RPM" to dictionary
rowcount = 0
for row in reader:
if rowcount == 0:
#trying to skip the first row
rowcount = rowcount + 1
else:
power_data['rpm'].append(row[0])
print(row)
json_report = {}
json_report['pwr_capacity_data'] = power_data
with open('LVMJSON', "w") as f1:
f1.write(json.dumps(json_report, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
f1.close()
if __name__ == "__main__":
main()
The output json file that i am getting is this:(please ignore the print(row) statement in my code)
{
"pwr_capacity_data":
{
"load_curr": [],
"rpm": [
"1200,30,12.62,37.88",
"1600,88,18.62,55.88"
],
"batt_output": [],
"power_cap": []
}
}
The whole row is getting saved in the list,but I just want the values under the column RPM to be saved .Can someone help me out with what I may be doing wrong.Thanks in advance.I have attached an image of csv file to just in case it helps
You could use Python's defaultdict to make it a bit easier. Also a dictionary to map all your header values.
from collections import defaultdict
import csv
import json
power_data = defaultdict(list)
header_mappings = {
'RPM' : 'rpm',
'Load Current' : 'load_curr',
'Battery Output' : 'batt_output',
'Power Capacity' : 'power_cap'}
with open('power1.lvm', newline='') as f_input:
csv_input = csv.DictReader(f_input)
for row in csv_input:
for key, value in row.items():
power_data[header_mappings[key]].append(value)
with open('LVMJSON.json', 'w') as f_output:
json.dump({'power_capacity_data' : power_data}, f_output, indent=2)
Giving you an output JSON file looking like:
{
"power_capacity_data": {
"batt_output": [
"12",
"18"
],
"power_cap": [
"37",
"55"
],
"load_curr": [
"30",
"88"
],
"rpm": [
"1200",
"1600"
]
}
}

adding column 2 from a group of text files to 1 text file

I have a group of text files and I am looking to sequentially add the second column from each text file into a new text file. The files are tab delimited and of the following format:
name dave
age 35
job teacher
income 30000
I have generated a file with the 1st column of one of these files in the place of the second column to hopefully simplify the problem:
0 name
0 age
0 job
0 income
I have a large number of these files and would like to have them all in a tab delimited text file such as:
name dave mike sue
age 35 28 40
job teacher postman solicitor
income 30000 20000 40000
I have a text file containing just the names of all the files called all_libs.txt
so far I have written:
#make a sorted list of the file names
with open('all_libs.txt', 'r') as lib:
people = list([line.rstrip() for line in lib])
people_s = sorted(people)
i=0
while i< len(people_s):
with open(people_s[i]) as inf:
for line in inf:
parts = line.split() #split line into parts
if len(parts) > 1: #if more than 1 discrete unit in parts
with open("all_data.txt", 'a') as out_file: #append column2 to all_data
out_file.write((parts[1])+"\n")
i=i+1 #go to the next file in the list
As each new file is opened I would like to add it as a new column rather than just appending as a new line. Would really appreciate any help? I realize something like SQL would probably make this easy but I have never used it and don't really have time to commit to the learning curve for SQL. Many thanks.
This is a very impractical way to store your data - each record is distributed over all the lines, so it's going to be hard to reconstruct the records when reading the file and (as you've seen) to add records.
You should be using a standard format like csv or (even better in a case like this) json:
For example, you could save them as CSV like this:
name,age,job,income
dave,35,teacher,30000
mike,28,postman,20000
sue,40,solicitor,40000
Reading this file:
>>> import csv
>>> with open("C:/Users/Tim/Desktop/people.csv", newline="") as infile:
... reader = csv.DictReader(infile)
... people = list(reader)
Now you have a list of people:
>>> people
[{'income': '30000', 'age': '35', 'name': 'dave', 'job': 'teacher'},
{'income': '20000', 'age': '28', 'name': 'mike', 'job': 'postman'},
{'income': '40000', 'age': '40', 'name': 'sue', 'job': 'solicitor'}]
which you can access easily:
>>> for item in people:
... print("{0[name]} is a {0[job]}, earning {0[income]} per year".format(item))
...
dave is a teacher, earning 30000 per year
mike is a postman, earning 20000 per year
sue is a solicitor, earning 40000 per year
Adding new records now is only a matter of adding them to the end of your file:
>>> with open("C:/Users/Tim/Desktop/people.csv", "a", newline="") as outfile:
... writer = csv.DictWriter(outfile,
... fieldnames=["name","age","job","income"])
... writer.writerow({"name": "paul", "job": "musician", "income": 123456,
... "age": 70})
Result:
name,age,job,income
dave,35,teacher,30000
mike,28,postman,20000
sue,40,solicitor,40000
paul,70,musician,123456
Or you can save it as JSON:
>>> import json
>>> with open("C:/Users/Tim/Desktop/people.json", "w") as outfile:
... json.dump(people, outfile, indent=1)
Result:
[
{
"income": "30000",
"age": "35",
"name": "dave",
"job": "teacher"
},
{
"income": "20000",
"age": "28",
"name": "mike",
"job": "postman"
},
{
"income": "40000",
"age": "40",
"name": "sue",
"job": "solicitor"
}
]
file_1 = """
name dave1
age 351
job teacher1
income 300001"""
file_2 = """
name dave2
age 352
job teacher2
income 300002"""
file_3 = """
name dave3
age 353
job teacher3
income 300003"""
template = """
0 name
0 age
0 job
0 income"""
Assume that the above is read from the files
_dict = {}
def concat():
for cols in template.splitlines():
if cols:
_, col_name = cols.split()
_dict[col_name] = []
for each_file in [file_1, file_2, file_3]:
data = each_file.splitlines()
for line in data:
if line:
words = line.split()
_dict[words[0]].append(words[1])
_text = ""
for key in _dict:
_text += '\t'.join([key, '\t'.join(_dict[key]), '\n'])
return _text
print concat()
OUTPUT
job teacher1 teacher2 teacher3
age 351 352 353
name dave1 dave2 dave3
income 300001 300002 300003

Categories

Resources