Reading files of same typolgy in a folder - python

I need a little help to finish my program.
I have in a folder 20 files of the same typology, strings with corresponding values.
Is there a way to create a function that opens all the files in this way
file1 = [line.strip() for line in open("/Python34/elez/file1.txt", "r")]?
I hope I explained it well.
Thanks!

from os import listdir
from os.path import join, isfile
def contents(filepath):
with open(filepath) as f:
return f.read()
directory = '/Python34/elez'
all_file_contents = [contents(join(directory, filename))
for filename in listdir(directory)
if isfile(join(directory, filename)]

Hi Gulliver this is how i will do it:
import os
all_files = [] ## create a list to keep all the lines for all files
for file in os.listdir('./'): ## use list dir to list all files in the dir
with open(file, 'r') as f: ## use with to open file
fields = [line.strip() for line in f] ## list comprehension to finish reading the field
all_fields.extend(fields) ## store in big list
For more information about using the with statement to open and read files, please refer to this answer Correct way to write to files?

Related

Get only the txt file you want from the folder containing the txt file - Python

I have a folder with a .txt files. the name of the files are:
my_file1.txt
my_file2.txt
my_file3.txt
my_file4.txt
In this way, only the last number is different.
import pickle
my_list = []
with open("/Users/users_a/Desktop/website-basic/sub_domain/sub_domain01.txt", "rb") as f1,
open("/Users/users_a/Desktop/website-ba\
sic/sub_domain/sub_domain02.txt", "rb") as f2, open("/Users/users_a/Desktop/website-
basic/sub_domain/sub_domain03.txt", "rb") as f3:
my_list.append(pickle.load(f1))
my_list.append(pickle.load(f2))
my_list.append(pickle.load(f3))
print(my_list)
In this way, I load a file and put it in the my_list variable to make a list and work. As the number of files to work increases, the code becomes too long and cumbersome.
Is there an easier and more pythonic way to load only the desired txt file??
You can use os.listdir():
import os
import pickle
my_list = []
path = "/Users/users_a/Desktop/website-basic/sub_domain"
for file in os.listdir(path):
if file.endswith(".txt"):
with open(f"{path}/{file}","r") as f:
my_list.append(pickle.load(f))
Where file is the filename of a file in path
I suggest using os.path.join() instead of hard coding the file paths
If your folder only contains the files you want to load you can just use:
for file in os.listdir(path):
with open(f"{path}/{file}","r") as f:
my_list.append(pickle.load(f))
Edit for my_file[number].txt
If you only want files in the form of my_file[number].txt, use:
import os
import re
import pickle
my_list = []
path = "/Users/users_a/Desktop/website-basic/sub_domain"
for file in os.listdir(path):
if re.match(r"my_file\d+.txt", file):
with open(f"{path}/{file}","r") as f:
my_list.append(pickle.load(f))
Online regex demo https://regex101.com/r/XJb2DF/1

How to read all .txt files from a directory

I would like to read all the contents from all the text files in a directory. I have 4 text files in the "path" directory, and my codes are;
for filename in os.listdir(path):
filepath = os.path.join(path, filename)
with open(filepath, mode='r') as f:
content = f.read()
thelist = content.splitlines()
f.close()
print(filepath)
print(content)
print()
When I run the codes, I can only read the contents from only one text file.
I will be thankful that there are any advice or suggestions from you or that you know any other informative inquiries for this question in stackoverflow.
If you need to filter the files' name per suffix, i.e. file extension, you can either use the string method endswith or the glob module of the standard library https://docs.python.org/3/library/glob.html
Here an example of code which save each file content as a string in a list.
import os
path = '.' # or your path
files_content = []
for filename in filter(lambda p: p.endswith("txt"), os.listdir(path)):
filepath = os.path.join(path, filename)
with open(filepath, mode='r') as f:
files_content += [f.read()]
With the glob way here an example
import glob
for filename in glob.glob('*txt'):
print(filename)
This should list your file and you can read them one by one. All the lines of the files are stored in all_lines list. If you wish to store the content too, you can keep append it too
from pathlib import Path
from os import listdir
from os.path import isfile, join
path = "path_to_dir"
only_files = [f for f in listdir(path) if isfile(join(path, f))]
all_lines = []
for file_name in only_files:
file_path = Path(path) / file_name
with open(file_path, 'r') as f:
file_content = f.read()
all_lines.append(file_content.splitlines())
print(file_content)
# use all_lines
Note: when using with you do not need to call close() explicitly
Reference: How do I list all files of a directory?
Basically, if you want to read all the files, you need to save them somehow. In your example, you are overriding thelist with content.splitlines() which deletes everything already in it.
Instead you should define thelist outside of the loop and use thelist.append(content.splitlines) each time, which adds the content to the list each iteration
Then you can iterate over thelist later and get the data out.

Concatenating files of different directories to one file (Python)

so I have managed to concatenate every single .txt file of one directory into one file with this code:
import os
import glob
folder_path = "/Users/EnronSpam/enron1/ham"
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r', encoding="latin-1") as f:
text = f.read()
with open('new.txt', 'a') as a:
a.write(text)
but in my 'EnronSpam' folder there are actually multiple directories (enron 1-6), each of which has a ham directory. How is it possible to go through each directory and add every single file of that directory into one file?
If you just want to collect all the txt files from the enron[1-6]/ham folders try this:
glob.glob("/Users/EnronSpam/enron[1-6]/ham/*.txt")
It will pick up all txt files from the enron[1-6] folders' ham subfolders.
Also a slightly reworked snippet of the original code looks like this:
import glob
glob_path = "/Users/EnronSpam/enron[1-6]/ham/*.txt"
with open("new.txt", "w") as a:
for filename in glob.glob(glob_path):
with open(filename, "r", encoding="latin-1") as f:
a.write(f.read())
Instead of always opening and appending to the new file it makes more sense to open it right at the beginning and write the content of the ham txt files.
So, given that the count and the names of the directories are known, you should just add the full paths in a list and loop execute it all for each element:
import os
import glob
folder_list = ["/Users/EnronSpam/enron1/ham", "/Users/EnronSpam/enron2/ham", "/Users/EnronSpam/enron3/ham"]
for folder in folder_list:
for filename in glob.glob(os.path.join(folder, '*.txt')):
with open(filename, 'r', encoding="latin-1") as f:
text = f.read()
with open('new.txt', 'a') as a:
a.write(text)

Looping through folder of JSON files to extract the same keyword from each in Python

I have the Python code below in which I am attempting to access a folder called downloaded that contains multiple JSON object files.
Within each JSON there is a value keyword for which I need to extract and add to the list named keywordList
I've attempted by adding the filenames to fileList (which works ok), but I cannot seem to loop through the fileList and extract the keyword connected.
Amy help much appreciated, thanks!
import os
os.chdir('/Users/Me/Api/downloaded')
fileList = []
keywordList = []
for filenames in os.walk('/Users/Me/Api/downloaded'):
fileList.append(filenames)
for file in filenames:
with open(file, encoding='utf-8', mode='r') as currentFile:
keywordList.append(currentFile['keyword'])
print(keywordList)
Your question mentioned JSON. So I have addressed that.
Let me know if this helps.
import json
import os
import glob
import pprint
keywordList = []
path = '/Users/Me/Api/downloaded'
for filename in glob.glob(os.path.join(path, '*.json')): #only process .JSON files in folder.
with open(filename, encoding='utf-8', mode='r') as currentFile:
data=currentFile.read().replace('\n', '')
keyword = json.loads(data)["keytolookup"]
if keyword not in keywordList:
keywordList.append(keyword)
pprint(keywordList)
EDIT note: Updated answer changing for loop from original response of: for filename in os.listdir(path)
OP mentioned glob version worked better. Had given that as alternative too.
You are adding the filenames in the fileList array but in the second for loop you are iterating over the filenames instead of the fileList.
import os
os.chdir('/Users/Me/Api/downloaded')
fileList = []
keywordList = []
for filenames in os.walk('/Users/Me/Api/downloaded'):
fileList.append(filenames)
for file in fileList:
with open(file, encoding='utf-8', mode='r') as currentFile:
keywordList.append(currentFile['keyword'])
Shouldn't the line for file in filenames: be for file in fileList:?
Also I think this is the correct way to use os.walk()
import os
fileList = []
keywordList = []
for root, dirs, files in os.walk('/Users/Me/Api/downloaded', topdown=False):
for name in files:
fileList.append(os.path.join(root, name))
for file in fileList:
with open(file, encoding='utf-8', mode='r') as currentFile:
keywordList.append(currentFile['keyword'])
print(keywordList)
open() returns a filehandle to the open file. You still need to loop over the contents of the file. By default, the contents are split by line-end (\n). After that, you have to match the keyword to the line.
Replace the second for loop with:
for file in filenames:
with open(file, encoding='utf-8', mode='r') as currentFile:
for line in currentFile:
if 'keyword' in line:
keywordList.append('keyword')
Also, have a look at the Python JSON module. Recursive iteration over json/dicts is answered here.
You are using currentFile like it is a json object, but it is only a file handle. I have added the missing step, the parsing of the file to a json object.
import os
import json
os.chdir('/Users/Me/Api/downloaded')
fileList = []
keywordList = []
for filenames in os.walk('/Users/Me/Api/downloaded'):
fileList.append(filenames)
for file in filenames:
with open(file, encoding='utf-8', mode='r') as currentFile:
data = json.load(currentFile) # Parses the file to json object
keywordList.append(data['keyword'])
print(keywordList)

Read and write multiple files files in python? [duplicate]

I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.
I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()
import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt
You may find the fileinput module useful. It is designed for exactly this problem.
I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff
Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'
from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]
I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)
This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()

Categories

Resources