Reading 50 CSV files - python

I want to read 50 CSV files and write down the results to a single CSV file. My current code reads only a single CSV file that is 1.csv and write the output to out.csv. How can I tweak this code ? Please help.
import csv
f1 = open("1.csv", "rb")
reader = csv.reader(f1)
header = reader.next()
f2 = open("out.csv", "wb")
writer = csv.writer(f2)
writer.writerow(header)
for row in reader:
if row[8] == 'READ' and row[10] != '0000':
writer.writerow(row)
f1.close()
f2.close()

Try using glob to loop through the files, read it and then append it to the new file.
To search the file for lines with an identifier use re
import glob
import re
out = []
for fil in glob.glob("path/to/files/*.csv"):
for line in open(fil,'r'):
if re.search('READ',line):
out.append(line)

You're going to need to look into something like os in Python and walk through your directories of excel files working each one individually. Instead of writing out to a new page each time a file is worked, you're going to append to the file that you are writing to.
This example will walk through all documents connected to the root tree finding all files in all directories. This could be overkill if you have all your files in one directory.
import os
# traverse root directory, and list directories as dirs and excel files as files
for root, dirs, files in os.walk("."):
path = root.split(os.sep)
print((len(path) - 1) * '---', os.path.basename(root))
for file in files:
print(len(path) * '---', file)

Related

How to read pairwise csv and json files having same names inside a folder using python?

Consider my folder structure having files in these fashion:-
abc.csv
abc.json
bcd.csv
bcd.json
efg.csv
efg.json
and so on i.e. a pair of csv files and json files having the same names, i have to perform the same operation by reading the same named files , do some operation and proceed to the next pair of files. How do i go about this?
Basically what i have in mind as a pseudo code is:-
for files in folder_name:
df_csv=pd.read_csv('abc.csv')
df_json=pd.read_json('abc.json')
# some script to execute
#now read the next pair and repeat for all files
Did you think of something like this?
import os
# collects filenames in the folder
filelist = os.listdir()
# iterates through filenames in the folder
for file in filelist:
# pairs the .csv files with the .json files
if file.endswith(".csv"):
with open(file) as csv_file:
pre, ext = os.path.splitext(file)
secondfile = pre + ".json"
with open(secondfile) as json_file:
# do something
You can use the glob module to extract the file names matching a pattern:
import glob
import os.path
for csvfile in glob.iglob('*.csv'):
jsonfile = csvfile[:-3] + 'json'
# optionaly control file existence
if not os.path.exists(jsonfile):
# show error message
...
continue
# do smth. with csvfile
# do smth. else with jsonfile
# and proceed to next pair
If the directory structure is consistent you could do the following:
import os
for f_name in {x.split('.')[0] for x in os.listdir('./path/to/dir')}:
df_csv = pd.read_csv("{f_name}.csv")
df_json = pd.read_json("{f_name}.json")
# execute the rest

How do I see if the contents of a csv file exists as a file in another directory?

EDIT:
To better explain my dilemma, I have a csv file that lists a number of applications numbered XXXXXX. Each of these applications have a corresponding xml file that exists in another directory. I'm essentially attempting to write a script that.
unzips the folder that contains the xml files and the csv file.
parse the entries within the csv file and sees that that each application listed in the csv file has a corresponding xml file.
Output another CSV file that sets an application to true if the xml file exists.
So far I've written the script to unzip, but I'm having a hard time wrapping my head around step 2 and 3.
from tkinter import Tk
from tkinter.filedialog import askdirectory
import zipfile
import os
import xml.etree.ElementTree as ET
import pandas as pd
from datetime import datetime
def unzipXML(root):
print(f'({datetime.now().strftime("%b. %d - %H:%M:%S")}) Stage 1 of 5: Unzipping folder(s)...')
# Get filepaths of .zip files
zipPaths = []
for filename in os.listdir(root):
if filename.endswith(".zip"):
zipPaths.append(root + "/" + filename)
# Unzip all .zip files
for path in zipPaths:
with zipfile.ZipFile(path, 'r') as zipRef:
zipRef.extractall(root)
print(f'({datetime.now().strftime("%b. %d - %H:%M:%S")}) {len(zipPaths)} folder(s) unzipped successfully.')
Loop through the names in the csv, calling os.path.exists() on each one.
with open("filenames.csv") as inf, open("apps.csv", "w") as outf:
in_csv = csv.reader(inf)
out_csv = csv.writer(outf)
for row in in_csv:
app_name = row[0] # replace [0] with the correct field number for your CSV
if os.path.exists(os.path.join(directory_path, app_name + ".xml")):
out_csv.writerow([app_name, 'exists'])
else:
out_csv.writerow([app_name, 'notexists'])
I don't know if I understand your problem, but maybe this will help:
#Get files from path
List_Of_Files = glob.glob('./' + '\\*.csv')
for file_name in List_Of_Files:
if file_name == your_var:
...

Concatenating files of different directories to one file (Python)

so I have managed to concatenate every single .txt file of one directory into one file with this code:
import os
import glob
folder_path = "/Users/EnronSpam/enron1/ham"
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r', encoding="latin-1") as f:
text = f.read()
with open('new.txt', 'a') as a:
a.write(text)
but in my 'EnronSpam' folder there are actually multiple directories (enron 1-6), each of which has a ham directory. How is it possible to go through each directory and add every single file of that directory into one file?
If you just want to collect all the txt files from the enron[1-6]/ham folders try this:
glob.glob("/Users/EnronSpam/enron[1-6]/ham/*.txt")
It will pick up all txt files from the enron[1-6] folders' ham subfolders.
Also a slightly reworked snippet of the original code looks like this:
import glob
glob_path = "/Users/EnronSpam/enron[1-6]/ham/*.txt"
with open("new.txt", "w") as a:
for filename in glob.glob(glob_path):
with open(filename, "r", encoding="latin-1") as f:
a.write(f.read())
Instead of always opening and appending to the new file it makes more sense to open it right at the beginning and write the content of the ham txt files.
So, given that the count and the names of the directories are known, you should just add the full paths in a list and loop execute it all for each element:
import os
import glob
folder_list = ["/Users/EnronSpam/enron1/ham", "/Users/EnronSpam/enron2/ham", "/Users/EnronSpam/enron3/ham"]
for folder in folder_list:
for filename in glob.glob(os.path.join(folder, '*.txt')):
with open(filename, 'r', encoding="latin-1") as f:
text = f.read()
with open('new.txt', 'a') as a:
a.write(text)

find and replace string from multiple files in a folder using python

I want to find string e.g. "Version1" from my files of a folder which contains multiple ".c" and ".h" files in it and replace it with "Version2.2.1" using python file.
Anyone know how this can be done?
Here's a solution using os, glob and ntpath. The results are saved in a directory called "output". You need to put this in the directory where you have the .c and .h files and run it.
Create a separate directory called output and put the edited files there:
import glob
import ntpath
import os
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for f in glob.glob("*.[ch]"):
with open(f, 'r') as inputfile:
with open('%s/%s' % (output_dir, ntpath.basename(f)), 'w') as outputfile:
for line in inputfile:
outputfile.write(line.replace('Version1', 'Version2.2.1'))
Replace strings in place:
IMPORTANT! Please make sure to back up your files before running this:
import glob
for f in glob.glob("*.[ch]"):
with open(f, "r") as inputfile:
newText = inputfile.read().replace('Version1', 'Version2.2.1')
with open(f, "w") as outputfile:
outputfile.write(newText)

Read filenames from CSV and then copy the files to different directory -part 2

I am trying to write a python code that will read csv and after reading look for the files in the directory and then move the files to the destination folder. I'm using Python 3.6.
I have the same problem which is defined in the code here: Read filenames from CSV and then copy the files to different directory
I tried there method but when i run this code it says not such file or directory
below is code which i modified, in the code they used agrv but i am defining the path...
import os
import shutil
import csv
import sys
csv_file = "maharera.csv"
existing_path_prefix = "D:\maharera"
new_path_prefix = "D:\movement"
with open(csv_file, 'r') as f:
reader = csv.reader(f)
for row in reader:
filename = row[0]
filepath = row[1]
new_filename = os.path.join(new_path_prefix, filename)
shutil.copy(filepath, new_filename)
i am trying to move files from d drive to drive but in different folder but files should be same from csv file..
CSV is attached here https://www.dropbox.com/s/0t9skcuje6jyew1/maharera.csv?dl=0
in csv there are two columns one is filename and other is there current destination where are they are kept right now...
there new destination is mentioned into code
Two mistakes are found in your code.
you include the header/first row of your csv file.
you are trying to copy the folder of the file instead of the file itself.
To avoid the errors, you can try
with open(csv_file, 'r') as f:
reader = csv.reader(f)
for i, row in enumerate(reader):
if i == 0:
pass # Skip header row
else:
filename, filepath = row
new_filename = os.path.join(new_path_prefix, filename)
old_filename = os.path.join(filepath, filename)
shutil.copy(old_filename, new_filename)

Categories

Resources