Read and write multiple files files in python? [duplicate] - python

I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.

I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()

import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt

You may find the fileinput module useful. It is designed for exactly this problem.

I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff

Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'

from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]

I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)

This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()

Related

How to copy file from directory A to directory B using a list

I'm trying to copy files from directory A, to directory B, based on a txt file containing the list of files to be extracted - located in directory B. I referred to this code: How to extract files from a particular folder with filename stored in a python list?
but it doesn't seem to enter the if (where I have put the 'in here' printout). Could someone tell me what I am doing wrong?
This is the code:
import os
import shutil
def read_input_file():
my_file = open("/mnt/d/Downloads/TSU/remaining_files_noUSD_19Jan.txt", "r")
# reading the file
data = my_file.read()
data_into_list = data.split("\n")
#print(data_into_list)
my_file.close()
return data_into_list
def filter_data(list_of_files):
path="/mnt/e/Toyota Smarthome/Untrimmed/Videos_mp4"
path_to_be_moved="/mnt/d/Downloads/TSU"
#print(list_of_files)
for file in os.listdir(path):
#print(file)
if file in list_of_files:
print("in here")
print(file)
shutil.copytree(path,path_to_be_moved)
#os.system("mv "+path+file+" "+path_to_be_moved)
if __name__ == "__main__":
list = read_input_file()
filter_data(list)
I am using python3 via WSL.
the mp4 folder contains multiple videos, and the output of "
read input file
is as follows
"
Thank you!
I think copytree from shutil has another purpose to just move file, it moves an entire structure. I'd use shutil.move
import os
import shutil
def read_input_file():
my_file = open("list.txt", "r")
data = my_file.read()
data_into_list = data.split("\n")
my_file.close()
return data_into_list
def filter_data(list_of_files):
path="directoryA/"
path_to_be_moved="directoryB/"
for file in os.listdir(path):
# print(file)
if file in list_of_files:
print("in here")
print(file)
shutil.move(path+file,path_to_be_moved+file)
mylist = read_input_file()
filter_data(mylist)
just saw your update, be careful, data_into_list = data.split("\n") is for a file.txt with a list separated with an enter. yours is with a comma and space so you'll have to change that.
Also you shouldn't use list as a variable name, mylist for example is better. list() is used to create list

Python - Trying to extract lines containing a key word from multiple files in a directory

I am trying to build a script which can look for all files in a certain folder, and pull any lines of text that contain a key word or phrase.
Very new to python, and not really understanding how to piece together multiple suggestions from others I have seen.
import re
from glob import glob
search = []
linenum = 0
pattern = re.compile("Dawg", re.IGNORECASE) # Compile a case-insensitive regex
path = 'C:\\Users\\Username\\Downloads\Testdataextraction\\Throw it in\\Audit_2022.log'
filenames = glob('*.log')
print(f"\n{filenames}")
with open (path, 'rt') as myfile:
for line in myfile:
linenum += 1
if pattern.search(line) != None: # If a match is found
search.append((linenum, line.rstrip('\n')))
for x in search: # Iterate over the list of tuples
print("\nLine " + str(x[0]) + ": " + x[1])
This does everything exactly how I want it, except can only see one file at a time.
My issue arises when I try deleting 'Audit_2022.log' from the end of the path = line.
Python says "PermissionError: [Errno 13] Permission denied: 'C:\Users\Username\Downloads\Testdataextraction\Throw it in'". I assume this is because it's looking at a directory and not a file, but how can I get it to read multiple files?
Many thanks in advance!
Assuming you also need to show the filename(s) you could do this:
import re
from glob import glob
import os
p = re.compile('Dawg', re.IGNORECASE)
path = r'C:\Users\Username\Downloads\Testdataextraction\Throw it in'
for file in glob(os.path.join(path, '*.log')):
with open(file) as logfile:
for i, line in enumerate(map(str.strip, logfile), 1):
if p.search(line) is not None:
print(f'File={file}, Line={i}, Data={line}')
The reason you're getting that Exception is because open needs a filename, and if you give it just a path, it doesn't really know what to do. A minimal example could be:
path = 'C:\\Users\\Username\\Downloads\Testdataextraction\\Throw it in\\Audit_2022.log'
with open (path, 'rt') as f:
pass
If the file exists, this should run fine, but if you change it to:
path = 'C:\\Users\\Username\\Downloads\Testdataextraction\\Throw it in'
with open (path, 'rt') as f:
pass
Then this will throw the exception.
I suspect what you're trying to do is glob through all log files in path and try each one, so something like:
import os
path = 'C:\\Users\\Username\\Downloads\Testdataextraction\\Throw it in'
filenames = glob(os.path.join(path, '*.log'))
print(f"\n{filenames}")
for filename in filenames:
with open (filename, 'rt') as myfile:
...
You can use os.listdir() to get all files in a directory, then nest your opening loop for each file in the directory:
import os
folder = 'C:\\Users\\Username\\Downloads\Testdataextraction\\Throw it in'
for file in glob(os.path.join(folder, '*.log')):
with open(file, 'rt') as myfile:
for line in myfile:
linenum += 1
if pattern.match(line): # If a match is found
search.append((linenum, line.rstrip('\n')))
See os.path.join() for a better path joining alternative

Move/rewrite a txt file that matches a string in a folder with multiple txt files

I'm trying to use python to search for a string in a folder which contains multiple .txt files.
My objective is to find those files containing the string and move/or re-write them in another folder.
what I have tried is:
import os
for filename in os.listdir('./*.txt'):
if os.path.isfile(filename):
with open(filename) as f:
for line in f:
if 'string/term to be searched' in line:
f.write
break
probably there is something wrong with this but, of course, cannot figure it out.
os.listdir argument must be a path, not a pattern. You can use glob to accomplish that task:
import os
import glob
for filename in glob.glob('./*.txt'):
if os.path.isfile(filename):
with open(filename) as f:
for line in f:
if 'string/term to be searched' in line:
# You cannot write with f, because is open in read mode
# and must supply an argument.
# Your actions
break
As Antonio says, you cannot write with f because it is open in read mode.
A possible solution to avoid the problem is the following:
import os
import shutil
source_dir = "your/source/path"
destination_dir = "your/destination/path"
for top, dirs, files in os.walk(source_dir):
for filename in files:
file_path = os.path.join(top, filename)
check = False
with open(file_path, 'r') as f:
if 'string/term to be searched' in f.read():
check = True
if check is True:
shutil.move(file_path, os.path.join(destination_dir , filename))
Remember that if your source_dir or destination_dir contains some "special characters" you have to put the double back-slash.
For example, this:
source_dir = "C:\documents\test"
should be
source_dir = "C:\documents\\test"

Reading files of same typolgy in a folder

I need a little help to finish my program.
I have in a folder 20 files of the same typology, strings with corresponding values.
Is there a way to create a function that opens all the files in this way
file1 = [line.strip() for line in open("/Python34/elez/file1.txt", "r")]?
I hope I explained it well.
Thanks!
from os import listdir
from os.path import join, isfile
def contents(filepath):
with open(filepath) as f:
return f.read()
directory = '/Python34/elez'
all_file_contents = [contents(join(directory, filename))
for filename in listdir(directory)
if isfile(join(directory, filename)]
Hi Gulliver this is how i will do it:
import os
all_files = [] ## create a list to keep all the lines for all files
for file in os.listdir('./'): ## use list dir to list all files in the dir
with open(file, 'r') as f: ## use with to open file
fields = [line.strip() for line in f] ## list comprehension to finish reading the field
all_fields.extend(fields) ## store in big list
For more information about using the with statement to open and read files, please refer to this answer Correct way to write to files?

Open file by filename wildcard

I have a directory of text files that all have the extension .txt. My goal is to print the contents of the text file. I wish to be able use the wildcard *.txt to specify the file name I wish to open (I'm thinking along the lines of something like F:\text\*.txt?), split the lines of the text file, then print the output.
Here is an example of what I want to do, but I want to be able to change somefile when executing my command.
f = open('F:\text\somefile.txt', 'r')
for line in f:
print line,
I had checked out the glob module earlier, but I couldn't figure out how to actually do anything to the files. Here is what I came up with, not working.
filepath = "F:\irc\as\*.txt"
txt = glob.glob(filepath)
lines = string.split(txt, '\n') #AttributeError: 'list' object has no attribute 'split'
print lines
import os
import re
path = "/home/mypath"
for filename in os.listdir(path):
if re.match("text\d+.txt", filename):
with open(os.path.join(path, filename), 'r') as f:
for line in f:
print line,
Although you ignored my perfectly fine solution, here you go:
import glob
path = "/home/mydir/*.txt"
for filename in glob.glob(path):
with open(filename, 'r') as f:
for line in f:
print line,
You can use the glob module to get a list of files for wildcards:
File Wildcards
Then you just do a for-loop over this list and you are done:
filepath = "F:\irc\as\*.txt"
txt = glob.glob(filepath)
for textfile in txt:
f = open(textfile, 'r') #Maybe you need a os.joinpath here, see Uku Loskit's answer, I don't have a python interpreter at hand
for line in f:
print line,
This code accounts for both issues in the initial question: seeks for the .txt file in the current directory and then allows the user to search for some expression with the regex
#! /usr/bin/python3
# regex search.py - opens all .txt files in a folder and searches for any line
# that matches a user-supplied regular expression
import re, os
def search(regex, txt):
searchRegex = re.compile(regex, re.I)
result = searchRegex.findall(txt)
print(result)
user_search = input('Enter the regular expression\n')
path = os.getcwd()
folder = os.listdir(path)
for file in folder:
if file.endswith('.txt'):
print(os.path.join(path, file))
txtfile = open(os.path.join(path, file), 'r+')
msg = txtfile.read()
search(user_search, msg)
Check out "glob — Unix style pathname pattern expansion"
http://docs.python.org/library/glob.html
This problem just came up for me and I was able to fix it with pure python:
Link to the python docs is found here: 10.8. fnmatch — Unix filename pattern matching
Quote: "This example will print all file names in the current directory with the extension .txt:"
import fnmatch
import os
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
print(file)

Categories

Resources