I want to run this program to convert the Json file into a dictionary, Im using linux mint, what commands do i use to run the program and convert the file.
import csv
import json
class Coverters:
def covert_to_dict(self, filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
for row in output:
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
def json_to_csv_file(self, csv_filename, json_filename):
""" Helper function to conver JSON to CSV file"""
with open(json_filename) as file:
data = json.load(file)
with open(csv_filename, "wb+") as file:
csv_file = csv.writer(file)
for item in data:
# Need to add all indexes for items
csv_file.writerow([item['ts'], item['visitor_uuid']] + item['fields'].values())
import csv
import json
def covert_to_dict(filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
for row in output:
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
covert_to_dict("filename") #change ot to the file name
save the above code in a file name it somthing( xyz.py)
place the file to convert in same directory
open terminal ->> go the directory ->> and run this command --> python xyz.py
I have a folder it is called DATA, inside that folder there is multiple .logs files and it is formatted as CSV . Now I want to convert every single .logs files inside DATA folder using Python.
import csv
import json
import glob, os
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
for file in glob.glob("**/*.logs", recursive = True):
csvFilePath = []
csvFilePath = file
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
I can only convert one single file but there is multiple .logs file as CSV
In that list last one has been converted to JSON "T1555.logs"
I'd re-arrange where you're traversing the files so that all of the results are stored in a single jsonArray, then written to the file at the end:
import csv
import json
import glob, os
def csvs_to_json(csvFilePaths, jsonFilePath):
jsonArray = []
for csvFilePath in csvFilePaths:
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
files = list(glob.glob("**/*.logs", recursive = True))
jsonFilePath = r'data.json'
csvs_to_json(files, jsonFilePath)
Let me know if this works for you!
While this code reads and writes a jsonlines file. How to compress it? I tried directly using gzip.open but I am getting various errors.
import json
def dump_jsonl(data, output_path, append=False):
Write list of objects to a JSON lines file.
mode = 'a+' if append else 'w'
with open(output_path, mode, encoding='utf-8') as f:
for line in data:
json_record = json.dumps(line, ensure_ascii=False)
f.write(json_record + '\n')
print('Wrote {} records to {}'.format(len(data), output_path))
def load_jsonl(input_path) -> list:
Read list of objects from a JSON lines file.
data = []
with open(input_path, 'r', encoding='utf-8') as f:
for line in f:
print('Loaded {} records from {}'.format(len(data), input_path))
return data
This is what I am doing to compress but I am unable to read it.
def dump_jsonl(data, output_path, append=False):
with gzip.open(output_path, "a+") as f:
for line in data:
json_record = json.dumps(line, ensure_ascii = False)
encoded = json_record.encode("utf-8") + ("\n").encode("utf-8")
compressed = gzip.compress(encoded)
Use the gzip module's compress function.
import gzip
with open('file.jsonl') as f_in:
with gzip.open('file.jsonl.gz', 'wb') as f_out:
gzip.open() is for opening gzipped files, not jsonl.
gzip a file in Python
Python support for Gzip
I am currently using the following code to convert a large CSV file to a JSON file.
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
csvFilePath = r'test_data.csv'
jsonFilePath = r'test_data.json'
csv_to_json(csvFilePath, jsonFilePath)
This code works fine and I am able to convert the CSV to JSON without any issues. However, as the CSV file contains 600,000+ rows and hence as many items in my JSON, it has become very difficult to manage the JSON file.
I would like to modify my above code such that for every 5000 rows of the CSV, the data is written into a new JSON file. Ideally, I would be having 120 (600,000/5000) JSON files in this case.
How can I do the same?
Split up your read\write methods and add a simple threshold:
JSON_ENTRIES_THRESHOLD = 5000 # modify to whatever you see suitable
def write_json(json_array, filename):
with open(filename, 'w', encoding='utf-8') as jsonf:
json.dump(json_array, jsonf) # note the usage of .dump directly to a file descriptor
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
filename_index = 0
for row in csvReader:
if len(jsonArray) >= JSON_ENTRIES_THRESHOLD:
# if we reached the treshold, write out
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
filename_index += 1
jsonArray = []
# Finally, write out the remainder
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]
I'm getting an error when trying to write to a csv file. Here's my two scripts:
def get_data():
customer = Customer.objects.all()
csvfile = StringIO.StringIO()
csvwriter = csv.writer(csvfile)
for i in customers:
return csvfile
In tests.py:
with open(get_data().getvalue(), 'rb') as myfile:
data = [row for row in csv.reader(myfile.read().splitlines())]
print data
This is the error:
IOError: [Errno 2] No such file or directory: '999999999\r\n'
Where '999999999' is what should be written in the file.
How can I fix this?
Your get_data() function doesn't return a file but a StringIO object. You don't need to open() it, you can just read it like this:
myfile = get_data().getvalue()
data = [row for row in csv.reader(myfile.splitlines())]
print data