I want to run this program to convert the Json file into a dictionary, Im using linux mint, what commands do i use to run the program and convert the file.
import csv
import json
class Coverters:
def covert_to_dict(self, filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
print(output)
for row in output:
print(row)
out.append(row)
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
def json_to_csv_file(self, csv_filename, json_filename):
""" Helper function to conver JSON to CSV file"""
with open(json_filename) as file:
data = json.load(file)
with open(csv_filename, "wb+") as file:
csv_file = csv.writer(file)
for item in data:
# Need to add all indexes for items
csv_file.writerow([item['ts'], item['visitor_uuid']] + item['fields'].values())
import csv
import json
def covert_to_dict(filename):
""" Read data from file and transform it to dictionary """
out = []
with open(filename, "r") as file:
output = csv.DictReader(file, fieldnames=self.__labels)
print(output)
for row in output:
print(row)
out.append(row)
# for line in file:
# out.append(dict(zip(self.__labels, line.split('#'))))
return out
covert_to_dict("filename") #change ot to the file name
save the above code in a file name it somthing( xyz.py)
place the file to convert in same directory
open terminal ->> go the directory ->> and run this command --> python xyz.py
Related
I have a folder it is called DATA, inside that folder there is multiple .logs files and it is formatted as CSV . Now I want to convert every single .logs files inside DATA folder using Python.
import csv
import json
import glob, os
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
for file in glob.glob("**/*.logs", recursive = True):
csvFilePath = []
csvFilePath = file
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
I can only convert one single file but there is multiple .logs file as CSV
In that list last one has been converted to JSON "T1555.logs"
T1003.001.logs
T1003.002.logs
T1003.003.logs
T1003.004.logs
T1003.logs
T1552.002.logs
T1552.004.logs
T1555.003.logs
T1555.logs
I'd re-arrange where you're traversing the files so that all of the results are stored in a single jsonArray, then written to the file at the end:
import csv
import json
import glob, os
def csvs_to_json(csvFilePaths, jsonFilePath):
jsonArray = []
for csvFilePath in csvFilePaths:
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
os.chdir(r"C:\Users\Arda\Desktop\DATA")# use whatever directory you want
#double\\ no single \
files = list(glob.glob("**/*.logs", recursive = True))
jsonFilePath = r'data.json'
csvs_to_json(files, jsonFilePath)
Let me know if this works for you!
While this code reads and writes a jsonlines file. How to compress it? I tried directly using gzip.open but I am getting various errors.
import json
def dump_jsonl(data, output_path, append=False):
"""
Write list of objects to a JSON lines file.
"""
mode = 'a+' if append else 'w'
with open(output_path, mode, encoding='utf-8') as f:
for line in data:
json_record = json.dumps(line, ensure_ascii=False)
f.write(json_record + '\n')
print('Wrote {} records to {}'.format(len(data), output_path))
def load_jsonl(input_path) -> list:
"""
Read list of objects from a JSON lines file.
"""
data = []
with open(input_path, 'r', encoding='utf-8') as f:
for line in f:
data.append(json.loads(line.rstrip('\n|\r')))
print('Loaded {} records from {}'.format(len(data), input_path))
return data
This is what I am doing to compress but I am unable to read it.
def dump_jsonl(data, output_path, append=False):
with gzip.open(output_path, "a+") as f:
for line in data:
json_record = json.dumps(line, ensure_ascii = False)
encoded = json_record.encode("utf-8") + ("\n").encode("utf-8")
compressed = gzip.compress(encoded)
f.write(compressed)
Use the gzip module's compress function.
import gzip
with open('file.jsonl') as f_in:
with gzip.open('file.jsonl.gz', 'wb') as f_out:
f_out.writelines(f_in)
gzip.open() is for opening gzipped files, not jsonl.
Read:
gzip a file in Python
Python support for Gzip
I am currently using the following code to convert a large CSV file to a JSON file.
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
jsonArray.append(row)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'test_data.csv'
jsonFilePath = r'test_data.json'
csv_to_json(csvFilePath, jsonFilePath)
This code works fine and I am able to convert the CSV to JSON without any issues. However, as the CSV file contains 600,000+ rows and hence as many items in my JSON, it has become very difficult to manage the JSON file.
I would like to modify my above code such that for every 5000 rows of the CSV, the data is written into a new JSON file. Ideally, I would be having 120 (600,000/5000) JSON files in this case.
How can I do the same?
Split up your read\write methods and add a simple threshold:
JSON_ENTRIES_THRESHOLD = 5000 # modify to whatever you see suitable
def write_json(json_array, filename):
with open(filename, 'w', encoding='utf-8') as jsonf:
json.dump(json_array, jsonf) # note the usage of .dump directly to a file descriptor
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
filename_index = 0
for row in csvReader:
jsonArray.append(row)
if len(jsonArray) >= JSON_ENTRIES_THRESHOLD:
# if we reached the treshold, write out
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
filename_index += 1
jsonArray = []
# Finally, write out the remainder
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
blah.com\user1:dajlfnadjhlasdjasnasjlfn:test1
blah.com\user2:dajlfnadjhlasdjasnasjlfn:test2
blah.com\user3:dajlfnadjhlasdjasnasjlfn:test3
blah.com\user4:dajlfnadjhlasdjasnasjlfn:test4
blah.com\user5:dajlfnadjhlasdjasnasjlfn:test5
blah.com\user6:dajlfnadjhlasdjasnasjlfn:test6
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
lines.append(username)
print(username)
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
writer.writerow(lines[i])
Result:
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]
I'm getting an error when trying to write to a csv file. Here's my two scripts:
Tasks.py
def get_data():
customer = Customer.objects.all()
csvfile = StringIO.StringIO()
csvwriter = csv.writer(csvfile)
for i in customers:
csvwriter.writerow([str(i.phone_number)])
return csvfile
In tests.py:
with open(get_data().getvalue(), 'rb') as myfile:
data = [row for row in csv.reader(myfile.read().splitlines())]
print data
This is the error:
IOError: [Errno 2] No such file or directory: '999999999\r\n'
Where '999999999' is what should be written in the file.
How can I fix this?
Your get_data() function doesn't return a file but a StringIO object. You don't need to open() it, you can just read it like this:
myfile = get_data().getvalue()
data = [row for row in csv.reader(myfile.splitlines())]
print data