Writing in a CSV variables captured from every JSON in current dir - python

I'm trying to go thru every json file in my current directory and find two specific variables, productId and userProfileId (both are getting well captured on the output file) but cant get it to run for every file in the folder.
This is my best try so far
import json
import csv
import os
KEYS = ['user_id','product_id']
for files in os.walk("."):
for filename in files:
for i in filename:
if i.endswith(".json"):
print(i)
with open(i) as json_data:
order_parsed = json.load(json_data)
products_data = order_parsed['items']
user_data = order_parsed['clientProfileData']
with open('user-item.csv','w') as dataFile:
newFileWriter = csv.writer(dataFile)
newFileWriter.writerow(KEYS)
for item in products_data:
productId = (products_data[0]['productId'])
userId = (user_data["userProfileId"])
print(productId)
print(userId)
newFileWriter.writerow([userId,productId])

To loop though all files in a folder, you can use this for.
for file in os.listdir('folder_path'):
if file[-5:] == ".json":
arq = open(file,'r')

You are doing a dictionary key search under the with loop. Try not doing your search under the with loop by unindenting products_data and user_data once.

Related

python3 - how to produce output files whose names are modifications of the input files names?

Hello I'm using a python script that consist in the following code:
from Bio import SeqIO
# set input file, output file to write to
gbk_file = "bin.10.gbk"
tsv_file = "results.bin_10.tsv"
cluster_out = open(tsv_file, "w")
# Extract CLuster info. write to file
for seq_record in SeqIO.parse(gbk_file, "genbank"):
for seq_feat in seq_record.features:
if seq_feat.type == "protocluster":
cluster_number = seq_feat.qualifiers["protocluster_number"][0].replace(" ","_").replace(":","")
cluster_type = seq_feat.qualifiers["product"][0]
cluster_out.write("#"+cluster_number+"\tCluster Type:"+cluster_type+"\n")
THe issue is that I want to automatize this script to multiple files in a certain directory, in this way I want that gbk_file stores all the files that have .gbk as suffix, and that tsv_file results in a respective output file according to each input file.
so if a input file has the name "bin.10.gbk", the output will be "results.bin_10.tsv".
I tried using glob python function but dont know how to create a tsv_file variable that stores modified strings from imput file names:
import glob
# setting variables
gbk_files = glob.glob("*.gbk")
tsv_files = gbk_files.replace(".gbk",".results.tsv")
cluster_out = open(tsv_files, "w")
making that changes, I got the following error:
AttributeError: 'list' object has no attribute 'replace'
so how can I deal with this?
Thanks for reading :)
Hope the following function can help you.
def processfiles():
for file in glob.glob("*.gbk"):
names = file.split('.')
tsv_file = f'results.{names[-3]}_{names[-2]}.tsv'
with open(tsv_file, 'w') as tsv:
tsv.write('write your content here')
tsv.close

Python Script Only Reads 100 First Files

I have a folder with 616 files, but my script only reads the first 100. What settings do I need to change around to get it to read them all? It's probably relevant, I'm using Anaconda Navigator's Jupyter Notebook.
Here's my code:
import re
import string
from collections import Counter
import os
import glob
def word_count(file_tokens):
for word in file_tokens:
count = Counter(file_tokens)
return count
files_list = glob.glob("german/test/*/negative/*")
print(files_list)
for path in files_list:
corpus, tache, classe, file_name = path.split("\\")
file = open(path, mode="r", encoding="utf-8")
read_file = file.read()
##lowercase
file_clean = read_file.lower()
##tokenize
file_tokens = file_clean.split()
##word count and sort
print(word_count(file_tokens))
You are probably hitting some max open files limit in your system. You can either close every file at the end of the loop, or use a context manager in the loop:
with open(path, mode="r", encoding="utf-8") as file:
....
Have you tried printing the length of the files_list variable and check if it is 616 or 100 ?
print(len(files_list))

How do I get one JSON file instead of thousands?

I'm using TweetScraper to scrape tweets with certain keywords. Right now, each tweet gets saved to a separate JSON file in a set folder, so I end up with thousands of JSON files. Is there a way to make each new tweet append to one big JSON file? If not, how do I process/work with thousands of small JSON files in Python?
Here's the part of settings.py that handles saving data:
# settings for where to save data on disk
SAVE_TWEET_PATH = './Data/tweet/'
SAVE_USER_PATH = './Data/user/'
I would read all files., put data in list and save it again as JSON
import os
import json
folder = '.'
all_tweets = []
# -- read ---
for filename in sorted(os.listdir(folder)):
if filename.endswith('.json'):
fullpath = os.path.join(folder, filename)
with open(fullpath) as fh:
tweet = json.load(fh)
all_tweets.append(tweet)
# --- save ---
with open('all_tweets.json', 'w') as fh:
json.dump(all_tweets, fh)

Read a list from a file and append to it using Python

I have a file called usernames.py that may contain a list or does exist at all:
usernames.py
['user1', 'user2', 'user3']
In Python I now want to read this file if it exists and append to the list a new user or create a list with that user i.e. ['user3']
This is what I have tried:
with open(path + 'usernames.py', 'w+') as file:
file_string = host_file.read()
file_string.append(instance)
file.write(file_string)
This gives me an error unresolved 'append'. How can I achieve this? Python does not know it is a list and if the file does not exist even worst as I have nothing to convert to a list.
Try this:
import os
filename = 'data'
if os.path.isfile(filename):
with open(filename, 'r') as f:
l = eval(f.readline())
else:
l = []
l.append(instance)
with open(filename, 'w') as f:
f.write(str(l))
BUT this is quite unsafe if you don't know where the file is from as it could include any code to do anything!
It would be better not to use a python file for persistence -- what happens if someone slips you a usernames.py that has exploit code in it? Consider a csv file or a pickle, or just a text file with one user per line.
That said, if you don't open it as a python file, something like this should work:
from os.path import join
with open( join(path, 'usernames.py'), 'r+') as file:
file_string = file.read()
file_string = file_string.strip().strip('[').strip(']')
file_data = [ name.strip().strip('"').strip("'") for name in file_string.split(',' )]
file_data.append( instance )
file.fseek(0)
file.write(str(file_data))
If usernames contain commas or end in quotes, you have to be more careful.

Emails via Python - Paste contents of excel/csv as a formatted table onto the mail body

I am trying to send mails via Python using smtplib. My main concern is to get the contents of a csv/excel and paste the data as it is(tabular format) onto the mail body of the email being sent out. I have the following snippet ready to search for the file and print the contents on the shell. How would I get the same output onto a mail body?
from os import listdir
import csv
import os
#Search for a csv in the specified folder
directory = "folder_path"
def find_csv_filenames( path_to_dir, suffix="Data.csv" ):
filenames = listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
filenames = find_csv_filenames(directory)
for name in filenames:
datafile=name
print(name)
path=directory+'//'+datafile
#Read the selected csv
with open(path,'r') as csvfile:
spamreader=csv.reader(csvfile,delimiter=' ',quotechar='|')
for row in spamreader:
print(', '.join(row))
TIA for your help.
Create a StringIO instance, say csvText and instead of print use
csvText.write(", ".join(row)+"\n")
The final newline is necessary, because it is not automatically added as by print. Finally (i.e. after the loop) calling csvText.getvalue() will return what you want to mail.
I would also suggest not to glue file specification together by yourself but call os.path.join() instead.

Categories

Resources