writing csv to tarfile but with no extension inside - python

I'm trying to create a .csv file and then create a .gz file with the previous csv inside. The problem is that the .gz file is created with a file with the correct file name but without the .csv extension and I don't know how to solve it.
This is the relevant code:
new_file_name = 'test'
data = [['some','test'],['data','strings']]
csv_fname = '%s.csv' % new_file_name
tar_fname = '%s.gz' % new_file_name
with open(csv_fname, 'wb') as csvfile:
swriter = csv.writer(csvfile)
for row in data:
swriter.writerow(row)
print 'creating archive'
out = tarfile.open(tar_fname, 'w:gz')
try:
print 'adding %s' % csv_fname
out.add(csv_fname)
finally:
print 'closing'
out.close()

Related

How to change smiles to images(png) using RDKit in python script

How to save image files after generate image file in python script?
def mols_to_pngs(mols, basename = "test"):
filenames = []
for i, mol in enumerate(mols):
filename = "%s%d.png" % (basename, i)
Draw.MolToFile(mol,filename)
filenames.append(filename)
return filenames
and I want to this process automatically using csv file and python script
To automatically save image files generated from a python script using a csv file, you need to read the data from the csv file and process it in your script. Here's a basic example:
import csv
import sys
def mols_to_pngs(mols, basename = "test"):
filenames = []
for i, mol in enumerate(mols):
filename = "%s%d.png" % (basename, i)
Draw.MolToFile(mol,filename)
filenames.append(filename)
return filenames
# Read the csv file
try:
with open('data.csv', 'r') as file:
reader = csv.reader(file)
# Skip the header row
next(reader)
mols = []
for row in reader:
# Process the data from the csv file
mol = process_data(row)
mols.append(mol)
except IOError:
print("Could not read file:", file)
sys.exit()
# Save the image files
filenames = mols_to_pngs(mols)
print("Saved the following files:", filenames)
Note that you need to replace the process_data function with your own implementation to process the data from the csv file and create the mol objects. Additionally, you may need to modify the code to match your specific use case.

Python - Read files from folder and Write CSV file in format

import glob
import os
import csv
from collections import OrderedDict
#Remove output file if already exists. Resolve the append Issue
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
if os.path.isfile(file_path):
os.remove(file_path)
#
list_of_files = glob.glob('C:\\Users\\Desktop\\Cobol\\*.CBL') # Input files in Folder
Fields = ['Program Name', 'LinesofCode'] # to be displayed in output CSV file
# opening output csv file to write (Fields)
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile1:
csvwriter = csv.writer(csvfile1)
csvwriter.writerow(Fields)
csvfile1.close()
def process_files_loc(list_of_files):
for fileName in list_of_files:
with open(fileName) as i:
count = sum(1 for line in i)
my_dict = {i : count} #input filename and its lines of code
ordered_dict = OrderedDict() #using OrderedDict
print(ordered_dict)
# creating ordered dict from dict
ordered_dict = OrderedDict(my_dict)
print(ordered_dict)
# writing records of Program name and LinesofCode to output csv file
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile2:
csvwriter = csv.writer(csvfile2)
csvwriter.writerows(ordered_dict)
csvfile2.close()
process_files_loc(list_of_files)
Output in Teminal (Error):
PS C:\Users\Python-1> & C:/Users/AppData/Local/Programs/Python/Python310/python.exe c:/Users/Python-1/one.py
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\ABCDEFGH.CBL' mode='r' encoding='cp1252'>, 191)])
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\IJKLMNOP.CBL' mode='r' encoding='cp1252'>, 195)])
Actual output of file in Folder:
C:\Users\Desktop\Cobol\Outputs
Name Date Modified Type Size
LOC3X.csv 9/15/2022 time Comma Seperated 1KB
Problem: Script executed and Read 2 CBL files in the Folder, and created 1 CSV file in output folder. The output CSV file to have,
Program Name LinesofCode
ABCDEFGH.CBL 191
IJKLMNOP.CBL 195
However, the actual output lines in CSV file is,
Program Name LinesofCode
Try something like this:
import glob
import csv
import os
def process_files_loc(files):
res = []
for file in files:
with open(file) as f:
line_count = len([line.strip("\n") for line in f if line != "\n"])
res.append([os.path.basename(f.name), line_count])
return res
if __name__ == '__main__':
with open('C:\\Users\\Main\\Desktop\\test\\test.csv', 'w', newline='') as f:
csvwriter = csv.writer(f)
csvwriter.writerow(['Program Name', 'LinesofCode'])
csvwriter.writerows(process_files_loc(glob.glob('C:\\Users\\Main\\Desktop\\test\\*.PY')))
Result:
result
Regards,

Converting a list of txt files into a list of csv files with python

I have the rolling code to convert a single .txt file into a a single .csv file, but I need the code to iterate over a directory of .txt files and gives out a directory of the same .txt files but in .csv format.
import csv
textfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.txt'
outfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.csv'
with open(textfile, 'r') as csvfile:
In_text = csv.reader(csvfile, delimiter=':')
all_rows = []
row_dict = {}
count_row = 1
for row in In_text:
if len(row) > 0:
row_dict[row[0].strip()] = row[1].strip()
if count_row % 4 == 0:
all_rows.append(row_dict)
row_dict = {}
count_row += 1
print(all_rows)
keys = all_rows[0].keys()
print(keys)
with open(outfile, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(all_rows)
So assuming you have your existing function
def text_to_csv(infilepath, outfilepath):
...
which can read a text file from infilepath and output the csv to outfilepath, then you can make a new function that takes two directories and calls it on every text file in the first:
import os
def convert_directory(in_dir, out_dir):
# Loop through every file in the directory
for filename in os.listdir(in_dir):
# Split the file name into a base portion and an extension
# e.g. "file.txt" -> ("file", ".txt")
base_name, extension = os.path.splitext(filename)
# If it is a text file, do the transformation
if extension == ".txt":
# Construct the name of the csv file to create
csv_filename = f"{base_name}.csv"
# Call your function with the full filepaths
text_to_csv(
os.path.join(in_dir, filename),
os.path.join(out_dir, csv_filename)
)
convert_directory(
"X:/general/DavidOrgEcon/GSTT/text to csv/input_dir",
"X:/general/DavidOrgEcon/GSTT/text to csv/output_dir",
)

how to get same name of csv file as the name of sql file in python

i have several sql query files. i am trying to run each file one by one and generate csv file for each sql query. But i want to CSV file name as the name of sql file name.
example-
while the time of running survey_cust.txt sql file that time csv file survey_cust.csv will generate.
sql file - "survey_cust.txt"
generate csv file - "survey_cust.csv"
path1 = "D:/Users/SPate233/Downloads/NS dashboard/sql_query/*.txt"
files = glob.glob(path1)
i = 1
for name in files:
try:
with open(name) as f:
sql_query = f.read()
cur.execute(sql_query)
result = cur.fetchall()
with open("output_%s.csv" % i, 'w') as fp:
a = csv.writer(fp, delimiter=',')
a.writerow([i[0] for i in cur.description])
a.writerows(result)
i+=1
except:
print("error")
You might want to parse the SQL file path to get the file name, and use it for the csv file name.
csv_name = os.path.split(name)[1].split('.')[0] + ".csv"
#os.path.split(name)[1] gets you the file name with the extension (txt?)
#split by the extension and get the name, then add csv extension
with open(csv_name, 'w') as fp:
...
Is that what you were asking?
Use os.path.basename:
import os
filename = os.path.basename('/root/dir/sub/file.txt')
# > filename = 'file.txt'
then simply replace the extention:
fname, fext = os.path.splitext(filename)
csvfile = '{0}.csv'.format(fname)
Or just:
name = '/root/dir/sub/file.txt'
csvfile = '{0}.csv'.format(os.path.splitext(os.path.basename(name))[0])

How to save data from python into a csv file

I've got a program that on the end prints a "match" I wanted to save the data in this "match" to a csv file, how can I do that? I've wrote some code, to save this variable, but it doesn't write anything
Here's my code:
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
file_list = os.listdir(r"C:\\PROJECT\\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\\PROJECT\\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows('%s' %match)
The part where I tried to save it into a csv file is the last 3 lines of code.
Here's a print of the "match" format: https://gyazo.com/930f9dad12109bc50825c91b51fb31f3
the way your code is structured, you iterate over the matches in your for loop, then, when the loop is finished, you save the last match in your CSV. You probably want to write each match in your CSV instead, inside the for loop.
try to replace the last lines of your code (starting at the last for loop) by:
with open('score.csv', 'wt') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
writer.writerow(match)
Assuming you already have your "match", you can use the CSV module in Python. The writer should get your job done.
It would be more helpful if you could elaborate on the format of your data.

Categories

Resources