Python pickle module usage - python

import pickle
data_list = list()
def Add(x):
data_list.append(x)
with open("data.pkl", "wb") as f:
pickle.dump(data_list, f, pickle.HIGHEST_PROTOCOL)
while 1:
abc = input("--->")
if abc == "data":
with open("data.pkl", "rb") as f:
print(pickle.load(f))
else:
Add(abc)
print(data_list)
I saved my list with the pickle module.
After restarting the program, if I query the list contents without adding new data, I can see the records, but if I add new data, I cannot see the old records. Why i can't see old records ?

It's because you are starting the program with an empty list. you should add a function which sync the database if exists on startup
import os
import pickle
# Sync database
if os.path.exists('data.pkl'):
with open("data.pkl", "rb") as f:
data_list = pickle.load(f)
else:
data_list = list()
def Add(x):
data_list.append(x)
with open("data.pkl", "wb") as f:
pickle.dump(data_list, f, pickle.HIGHEST_PROTOCOL)
while 1:
abc = input("--->")
if abc == "data":
with open("data.pkl", "rb") as f:
print(pickle.load(f))
else:
Add(abc)
print(data_list)

Related

JSON file rewrites data after I re-run the program

I wanted to make a program that tracks the progress of our competition. I made a library containing our names as the key, and our wins as the value. I then made a JSON file to save the progress. But for some reason, when I re-run the program, it goes back to it's initial values and adds the new one; as if it was the first time I used the program.
Here is my code:
import os, json, sys
Numbers = {
"Peter" : 1,
"Drew" : 1,
}
def q1():
New_numbers = {}
q = input("Name? ")
if q not in Numbers:
Numbers[q] =1
with open("list.json", "w") as f:
json.dump(Numbers, f)
f.close()
with open("list.json", "r") as f:
New_numbers = json.load(f)
for key,value in New_numbers.items():
print(key, ":", value)
elif q in Numbers:
Numbers[q] += 1
with open("list.json", "w") as f:
json.dump(Numbers, f)
f.close()
with open("list.json", "r") as f:
New_numbers = json.load(f)
for key,value in New_numbers.items():
print(key, ":", value)
q1()
The first use, it works perfectly. However, as I've mentioned before, when I use it again, it loads the initial library; not the JSON file.

python nested json to csv Incomplete conversion

this is my json file
{"_index":"core-bvd-locations","_type":"_doc","_id":"75b82cba4a80784f4fa36d14c86f6d85","_score":1,"_source":{"a_id":"FR518077177","a_id_type":"BVD ID","a_name":"Moisan Patrick Roger","a_name_normal":"MOISAN PATRICK ROGER","a_country_code":"FR","a_country":"France","a_in_compliance_db":false,"a_nationality":"FR","a_street_address":"Les Carmes","a_city":"Decize","a_postcode":"58300","a_region":"Bourgogne-Franche-Comte|Nievre","a_phone":"+33 603740000","a_latitude":46.79402777777778,"a_longitude":3.496277777777778,"a_national_ids":{"European VAT number":["FR58 518077177"],"SIREN number":["518077177"],"TIN":["518077177"],"SIRET number":["518077177-00013"]},"relationship":"Location info","file_name":"/media/hedwig/iforce/data/BvD/s3-transfer/SuperTable_v3_json/locations/part-00021-1f62c713-17a0-410d-9b18-32328d9836d6-c000.json","a_geo_point":{"lat":46.79402777777778,"lon":3.496277777777778}}}
this is my code
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('F:\\1.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w') #
# csvfile = open(path+'.csv', 'wb') # python2
csvfile = open('F:\\1.csv', 'w', encoding='utf-8', newline='') #
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0]) #
print(path)
trans(path)
C:\Users\jeri\PycharmProjects\pythonProject9\venv\Scripts\python.exe C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
C:\Users\jeri\PycharmProjects\pythonProject9\zwc_count_file.py
['_index', '_type', '_id', '_score', '_source']
Process finished with exit code 0
output jie
enter image description here
Information in nested json file cannot be parse, how can i modify the code
import json
import pandas as pd
file_data = open("json_filname.json",'r').read()
data= json.loads(file_data)
df = pd.json_normalize(data)
df
json.load(): json.load() accepts file object, parses the JSON data, populates a Python dictionary with the data and returns it back to you.
import json
# Opening JSON file
f = open('data.json')
# returns JSON object as
# a dictionary
data = json.load(f)
writer.writerow write the entire row, rigth sintaxis
writer.writerow(#iterable_object#)

How to convert CSV file to multiline JSON?

Here's my code, really simple stuff...
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
out = json.dumps( [ row for row in reader ] )
jsonfile.write(out)
Declare some field names, the reader uses CSV to read the file, and the filed names to dump the file to a JSON format. Here's the problem...
Each record in the CSV file is on a different row. I want the JSON output to be the same way. The problem is it dumps it all on one giant, long line.
I've tried using something like for line in csvfile: and then running my code below that with reader = csv.DictReader( line, fieldnames) which loops through each line, but it does the entire file on one line, then loops through the entire file on another line... continues until it runs out of lines.
Any suggestions for correcting this?
Edit: To clarify, currently I have: (every record on line 1)
[{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"},{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}]
What I'm looking for: (2 records on 2 lines)
{"FirstName":"John","LastName":"Doe","IDNumber":"123","Message":"None"}
{"FirstName":"George","LastName":"Washington","IDNumber":"001","Message":"Something"}
Not each individual field indented/on a separate line, but each record on it's own line.
Some sample input.
"John","Doe","001","Message1"
"George","Washington","002","Message2"
The problem with your desired output is that it is not valid json document,; it's a stream of json documents!
That's okay, if its what you need, but that means that for each document you want in your output, you'll have to call json.dumps.
Since the newline you want separating your documents is not contained in those documents, you're on the hook for supplying it yourself. So we just need to pull the loop out of the call to json.dump and interpose newlines for each document written.
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
You can use Pandas DataFrame to achieve this, with the following Example:
import pandas as pd
csv_file = pd.DataFrame(pd.read_csv("path/to/file.csv", sep = ",", header = 0, index_col = False))
csv_file.to_json("/path/to/new/file.json", orient = "records", date_format = "epoch", double_precision = 10, force_ascii = True, date_unit = "ms", default_handler = None)
import csv
import json
file = 'csv_file_name.csv'
json_file = 'output_file_name.json'
#Read CSV File
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Convert csv data into json
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) #for pretty
f.write(json.dumps(data))
read_CSV(file,json_file)
Documentation of json.dumps()
I took #SingleNegationElimination's response and simplified it into a three-liner that can be used in a pipeline:
import csv
import json
import sys
for row in csv.DictReader(sys.stdin):
json.dump(row, sys.stdout)
sys.stdout.write('\n')
You can try this
import csvmapper
# how does the object look
mapper = csvmapper.DictMapper([
[
{ 'name' : 'FirstName'},
{ 'name' : 'LastName' },
{ 'name' : 'IDNumber', 'type':'int' },
{ 'name' : 'Messages' }
]
])
# parser instance
parser = csvmapper.CSVParser('sample.csv', mapper)
# conversion service
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
Edit:
Simpler approach
import csvmapper
fields = ('FirstName', 'LastName', 'IDNumber', 'Messages')
parser = CSVParser('sample.csv', csvmapper.FieldMapper(fields))
converter = csvmapper.JSONConverter(parser)
print converter.doConvert(pretty=True)
I see this is old but I needed the code from SingleNegationElimination however I had issue with the data containing non utf-8 characters. These appeared in fields I was not overly concerned with so I chose to ignore them. However that took some effort. I am new to python so with some trial and error I got it to work. The code is a copy of SingleNegationElimination with the extra handling of utf-8. I tried to do it with https://docs.python.org/2.7/library/csv.html but in the end gave up. The below code worked.
import csv, json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("Scope","Comment","OOS Code","In RMF","Code","Status","Name","Sub Code","CAT","LOB","Description","Owner","Manager","Platform Owner")
reader = csv.DictReader(csvfile , fieldnames)
code = ''
for row in reader:
try:
print('+' + row['Code'])
for key in row:
row[key] = row[key].decode('utf-8', 'ignore').encode('utf-8')
json.dump(row, jsonfile)
jsonfile.write('\n')
except:
print('-' + row['Code'])
raise
Add the indent parameter to json.dumps
data = {'this': ['has', 'some', 'things'],
'in': {'it': 'with', 'some': 'more'}}
print(json.dumps(data, indent=4))
Also note that, you can simply use json.dump with the open jsonfile:
json.dump(data, jsonfile)
Use pandas and the json library:
import pandas as pd
import json
filepath = "inputfile.csv"
output_path = "outputfile.json"
df = pd.read_csv(filepath)
# Create a multiline json
json_list = json.loads(df.to_json(orient = "records"))
with open(output_path, 'w') as f:
for item in json_list:
f.write("%s\n" % item)
How about using Pandas to read the csv file into a DataFrame (pd.read_csv), then manipulating the columns if you want (dropping them or updating values) and finally converting the DataFrame back to JSON (pd.DataFrame.to_json).
Note: I haven't checked how efficient this will be but this is definitely one of the easiest ways to manipulate and convert a large csv to json.
As slight improvement to #MONTYHS answer, iterating through a tup of fieldnames:
import csv
import json
csvfilename = 'filename.csv'
jsonfilename = csvfilename.split('.')[0] + '.json'
csvfile = open(csvfilename, 'r')
jsonfile = open(jsonfilename, 'w')
reader = csv.DictReader(csvfile)
fieldnames = ('FirstName', 'LastName', 'IDNumber', 'Message')
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
json.dump(output, jsonfile, indent=2, sort_keys=True)
def read():
noOfElem = 200 # no of data you want to import
csv_file_name = "hashtag_donaldtrump.csv" # csv file name
json_file_name = "hashtag_donaldtrump.json" # json file name
with open(csv_file_name, mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
with open(json_file_name, 'w') as json_file:
i = 0
json_file.write("[")
for row in csv_reader:
i = i + 1
if i == noOfElem:
json_file.write("]")
return
json_file.write(json.dumps(row))
if i != noOfElem - 1:
json_file.write(",")
Change the above three parameter, everything will be done.
import csv
import json
csvfile = csv.DictReader('filename.csv', 'r'))
output =[]
for each in csvfile:
row ={}
row['FirstName'] = each['FirstName']
row['LastName'] = each['LastName']
row['IDNumber'] = each ['IDNumber']
row['Message'] = each['Message']
output.append(row)
json.dump(output,open('filename.json','w'),indent=4,sort_keys=False)

Using csvreader against a gzipped file in Python

I have a bunch of gzipped CSV files that I'd like to open for inspection using Python's built in CSV reader. I'd like to do this without having first to manually unzip them to disk. I guess I want to somehow get a stream to the uncompressed data, and pass this into the CSV reader. Is this possible in Python?
Use the gzip module:
with gzip.open(filename, mode='rt') as f:
reader = csv.reader(f)
#...
I've tried the above version for writing and reading and it didn't work in Python 3.3 due to "bytes" error. However, after some trial and error I could get the following to work. Maybe it also helps others:
import csv
import gzip
import io
with gzip.open("test.gz", "w") as file:
writer = csv.writer(io.TextIOWrapper(file, newline="", write_through=True))
writer.writerow([1, 2, 3])
writer.writerow([4, 5, 6])
with gzip.open("test.gz", "r") as file:
reader = csv.reader(io.TextIOWrapper(file, newline=""))
print(list(reader))
As amohr suggests, the following works as well:
import gzip, csv
with gzip.open("test.gz", "wt", newline="") as file:
writer = csv.writer(file)
writer.writerow([1, 2, 3])
writer.writerow([4, 5, 6])
with gzip.open("test.gz", "rt", newline="") as file:
reader = csv.reader(file)
print(list(reader))
a more complete solution:
import csv, gzip
class GZipCSVReader:
def __init__(self, filename):
self.gzfile = gzip.open(filename)
self.reader = csv.DictReader(self.gzfile)
def next(self):
return self.reader.next()
def close(self):
self.gzfile.close()
def __iter__(self):
return self.reader.__iter__()
now you can use it like this:
r = GZipCSVReader('my.csv')
for map in r:
for k,v in map:
print k,v
r.close()
EDIT: following the below comment, how about a simpler approach:
def gzipped_csv(filename):
with gzip.open(filename) as f:
r = csv.DictReader(f)
for row in r:
yield row
which let's you then
for row in gzipped_csv(filename):
for k, v in row:
print(k, v)

csv writer not closing file

im reading a csv file and then writing a new one:
import csv
with open('thefile.csv', 'rb') as f:
data = list(csv.reader(f))
import collections
counter = collections.defaultdict(int)
for row in data:
counter[row[11]] += 1
writer = csv.writer(open('/pythonwork/thefile_subset1.csv', 'w'))
for row in data:
if counter[row[11]] >= 500:
writer.writerow(row)
for some reason i cannot get the csv.writer to close the file. when i open the file it opens it as READ ONLY because it says that is still open.
how do i close thefile_subset1.csv after i am done with it?
with open('/pythonwork/thefile_subset1.csv', 'w') as outfile:
writer = csv.writer(outfile)
for row in data:
if counter[row[11]] >= 500:
writer.writerow(row)
You can break out the open command into its own variable, so that you can close it later.
f = open('/pythonwork/thefile_subset1.csv', 'w')
writer = csv.writer(f)
f.close()
csv.writer throws a ValueError if you try to write to a closed file.
close the file, not the csv writer. To do this, you'll need to open the file first before instantiating your writer rather than keeping it all in one line.
import csv
import collections
with open('thefile.csv', 'rb') as f:
data = list(csv.reader(f))
counter = collections.defaultdict(int)
for row in data:
counter[row[11]] += 1
f.close() # good idea to close if you're done with it
fSubset = open('/pythonwork/thefile_subset1.csv', 'w')
writer = csv.writer(fSubset)
for row in data:
if counter[row[11]] >= 500:
writer.writerow(row)
fSubset.close()
Also, I would suggest keeping your imports at the top of the script and closing the first file when you're done with it.
Force the writer to clean up:
del writer
Look at the difference:
with open('thefile.csv', 'rb') as f:
data = list(csv.reader(f))
vs:
writer = csv.writer(open('/pythonwork/thefile_subset1.csv', 'w'))

Categories

Resources