I have some csv files in a folder and I am trying to delete all blank rows and move the news files into a new folder.
Here is the code I have:
import csv
import glob
import os
import shutil
path = 'in_folder/*.csv'
files=glob.glob(path)
#Read every file in the directory
x = 0 #counter
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in fin.readlines():
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
dir_src = "in_folder"
dir_dst = "out_folder"
for file in os.listdir(dir_src):
if x>0:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
What the code is doing right now is deleting everything from the files and moving them to the new folder. I want my files to be the same but with deleted blank rows.
You can just output every line to the new file, no need to do any moving afterwards:
dir_src = "in_folder/*.csv"
dir_dst = "out_folder"
files = glob.glob(dir_src)
# Read every file in the directory
x = 0 # counter
for filename in files:
outfilename = os.path.join(dir_dst, os.path.basename(filename))
with open(filename, 'r') as fin:
with open(outfilename, 'w') as fout:
for line in fin:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
try this.
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in data:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
Related
I want all files in directory "path" to have the string "error" removed from them and the result to be saved in the same file that was editted. My current code (below) ends up clearing up the entire file, rather than just removing the string and keeping everything else the same.
import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file)) as fin, open(os.path.join(path, file), "w+") as fout:
for line in fin:
line = line.replace(error, "f")
fout.write(line)
import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file), 'r') as fin:
d = din.read()
with open(os.path.join(path, file), "w") as fout:
d = d.replace(error, "")
fout.write(d)
This is the correct way to do this:
import os
path = "path"
for file in os.listdir(path):
if not os.path.isdir(file):
with open(file, 'r+') as fd:
contents = fd.read().replace('error', '')
fd.seek(0)
fd.write(contents)
fd.truncate()
I have the following script that does multiple operations for .txt file (splits by tag, removes line breaks, sentence per line & blank lines in between).
I know how to do this for a specific file:
import re
with open("input.txt", "r") as f:
i = 1
w = None
for line in f:
line = line.strip()
if line:
if w is not None and line == "</div>":
w.close()
i += 1
w = None
else:
if w is None:
w = open('output_%i.txt' % i, 'w')
for s in re.split("(?<=[.!:;?])\s+", line):
w.write(s + '\n\n')
How can I apply this to all .txt files in a dir?
I cannot figure out how to open and write to all files in a dir.
I thought this would work:
import os
import re
path = "/Users/simon/dic/en-new-oxford/output"
for filename in os.listdir(path):
with open(os.path.join(path, filename), 'r') as f:
i = 1
w = None
for line in f:
line = line.strip()
if line:
if w is not None and line == "</d:entry>":
w.close()
i += 1
w = None
else:
if w is None:
w = open(os.path.join('path, filename_%i.txt') % i, 'w')
for s in re.split("(?<=[.!:;?])\s+", line):
w.write(s + '\n\n')
What is wrong about this?
This does work. You probably had some other directory in your directory messing up your code.Also checks if the file ends with '.txt' .
from os import listdir
from os.path import isfile, join
path = ''
# Get all Files in directory , not other directory's.
allFiles = [join(path, file) for file in listdir(path) if isfile(join(path, file)) and file.endswith('.txt')]
for file in allFiles:
with open(file) as f:
"do stuff here"
it's a python script.
I am writing the content of all the files with a special extension to a txt file skipping the first line in files, removing duplicates and special Paths
import os
skipLines = "/home/YY"
mainDir = "C:/XX"
directory = os.fsencode(mainDir)
endFile = mainDir+"/endFile.txt"
open(endFile, 'w').close()
fileoutput = open(endFile, "a")
for file in os.listdir(directory):
filename = os.fsdecode(file)
fileFullPath = mainDir+"/"+filename
if filename.endswith(".dep"):
print("working on : ", filename)
file = open(fileFullPath, "r")
next(file)
for line in file:
if skipLines not in line:
fileoutput.write(line)
else:
continue
file.close()
fileoutput.close()
lines_seen = set()
with open("C:/XX/endFile.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
fileoutput.close()
my end file looks like this:
F0/XXX.c
../../F1/F2/X/Test.h
../../F1/F2/Z/Test1.h
../../../F1/F3/Y/Test3.h
.
.
.
Here is my question:
how can I copy these file from the paths indicated in each line, and create a folder and create the same file path and copy the files in the corresponding paths?
I have the rolling code to convert a single .txt file into a a single .csv file, but I need the code to iterate over a directory of .txt files and gives out a directory of the same .txt files but in .csv format.
import csv
textfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.txt'
outfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.csv'
with open(textfile, 'r') as csvfile:
In_text = csv.reader(csvfile, delimiter=':')
all_rows = []
row_dict = {}
count_row = 1
for row in In_text:
if len(row) > 0:
row_dict[row[0].strip()] = row[1].strip()
if count_row % 4 == 0:
all_rows.append(row_dict)
row_dict = {}
count_row += 1
print(all_rows)
keys = all_rows[0].keys()
print(keys)
with open(outfile, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(all_rows)
So assuming you have your existing function
def text_to_csv(infilepath, outfilepath):
...
which can read a text file from infilepath and output the csv to outfilepath, then you can make a new function that takes two directories and calls it on every text file in the first:
import os
def convert_directory(in_dir, out_dir):
# Loop through every file in the directory
for filename in os.listdir(in_dir):
# Split the file name into a base portion and an extension
# e.g. "file.txt" -> ("file", ".txt")
base_name, extension = os.path.splitext(filename)
# If it is a text file, do the transformation
if extension == ".txt":
# Construct the name of the csv file to create
csv_filename = f"{base_name}.csv"
# Call your function with the full filepaths
text_to_csv(
os.path.join(in_dir, filename),
os.path.join(out_dir, csv_filename)
)
convert_directory(
"X:/general/DavidOrgEcon/GSTT/text to csv/input_dir",
"X:/general/DavidOrgEcon/GSTT/text to csv/output_dir",
)
Hey I have this code that concatenate all the csv files of a folder in a only csv file. The problem is that this line of code if i! = 0: prevents the first row of each csv from being read except in the first csv. Any ideas on how to modify it so that it can read all the rows of the csv
#!/usr/bin/env python
import pandas as pd
import glob, os
import shutil
#import csv files from folder
path = r'/Users/sonia/Animalsmanage/output'
allFiles = glob.glob(path + "/*.csv")
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
if i != 0:
infile.readline()
outfile.write(bytearray(b'\n')) # add a empty line
shutil.copyfileobj(infile, outfile)
print(fname + " has been imported.")
If you want to include all the rows from the input file and separate the output of each input file by a blank, you can try this:
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
outfile.write(bytearray(b'\n'))
print(fname + " has been imported.")
So if I have 3 input files:
test1.csv:
name,id
dan,22
keith,23
test2.csv:
name,id
mike,33
phil,44
test3.csv:
name,id
john,5
peter,6
My output file will contain:
name,id
dan,22
keith,23
name,id
mike,33
phil,44
name,id
john,5
peter,6
EDIT:
If you don't want the blank line at the end, you can use the following instead:
numFiles = len(allFiles)
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
if i < numFiles - 1:
outfile.write(bytearray(b'\n'))
print(fname + " has been imported.")