Counting "something" in multiple text files in one folder

Counting "something" in multiple text files in one folder - python

I am trying to count the number times "Tmp" occurs in a file and what file the count belongs to. I created a script that works but I have to setup the input file and output directory for each file. To improve it I would like the script to go through each file in a folder after setting it up once.
I have been experimenting with:
import tkFileDialog
import glob
import os
directory = tkFileDialog.askdirectory()
for infile in glob.glob(os.path.join(directory, "*.*")):
open(infile, "r").read()
infile.count("Tmp")
Currently I am counting the number of times "Tmp" occurs in the file name and not the actual file, when I type:
print infile
it outputs the contents of the text files but not the directory? I am just confused on where to go or what to do.

I would use os.walk rather than glob:
import tkFileDialog
import os
import os.path
import re
directory = tkFileDialog.askdirectory()
for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
path = os.path.join(dirpath, filename)
with open(path) as file:
contents = file.read()
print path[:30], contents.count('Tmp'), re.findall('Tmp\d{5}', contents)

That should be:
data = open(infile, 'r').read()
print data.count('Tmp')

import os
import glob
import tkFileDialog
directory = tkFileDialog.askdirectory()
for infile in glob.glob(os.path.join(directory, '*')):
if os.path.isfile(infile):
f = open(infile)
print os.path.split(infile)[-1], f.read().count('Tmp')

Related

python find file type and save path and name to txt/csv [duplicate]

This question already has answers here:
Python Glob without the whole path - only the filename
(10 answers)
Closed 4 months ago.
Can anyone help write a script, the goal is to find files with extension and save the name and path in TXT or CSV
that a script which find and print file file type and path,but how can i save the result to csv/txt ?
import fnmatch
import os
import csv
rootPath = '/'
pattern = '*.exe'
filepath = 'C:/Users/user/Desktop/filetest.txt'
for root, dirs, files in os.walk(rootPath):
for filepath in fnmatch.filter(files, pattern):
x = (os.path.join(root, filepath))
print(x)
i try this one, but its save only the last line.
import fnmatch
import os
import csv
rootPath = '/'
pattern = '*.exe'
filepath = 'C:/Users/user/Desktop/filetest.txt'
for root, dirs, files in os.walk(rootPath):
for filepath in fnmatch.filter(files, pattern):
x = (os.path.join(root, filepath))
file = open(filepath, 'w')
file.write(x)
file.close()
print(x)

I think the reason is you always open the file within loop using open(filepath, 'w') the option 'w' is always overwrite the file, if you want to append, you can use 'a', but I think in this case is not good solution because the main reason is you always reopen the file for each loop
By using your code, I think you can solve it by putting the open command outside the loop
import fnmatch
import os
import csv
rootPath = '/'
pattern = '*.exe'
filepath = 'C:/Users/user/Desktop/filetest.txt'
file = open(filepath, 'w')
for root, dirs, files in os.walk(rootPath):
for filepath in fnmatch.filter(files, pattern):
x = (os.path.join(root, filepath))
file.write(x+'\n')
file.close()

from glob import glob
import os
files = sorted(glob(os.path.join(rootPath, pattern)))
with open(filepath, 'w') as fid:
fid.write('\n'.join(files))

Reading multiple txt files from multiple folders

I have 20 folders, each containing 50 txt files, I need to read all of them in order to compare the word counts of each folder. I know how to read multiple files in one folder, but it is slow, is there a more efficient way instead of reading the folder one by one like below?
import re
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import os
import glob
1. folder1
folder_path = '/home/runner/Final-Project/folder1'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
2. folder2
folder_path = '/home/runner/Final-Project/folder2'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))

You can do something similar using glob like you have, but with the directory names.
folder_path = '/home/runner/Final-Project'
for filename in glob.glob(os.path.join(folder_path,'*','*.txt')):
# process your files
The first '*' in the os.path.join() represents directories of any name. So calling glob.glob() like this will go through and find any text file in any direct sub-directory within folder_path

Below function will return list of files in all the directories and sub-directories without using glob. Read from the list of files and open to read.
def list_of_files(dirName):
files_list = os.listdir(dirName)
all_files = list()
for entry in files_list:
# Create full path
full_path = os.path.join(dirName, entry)
if os.path.isdir(full_path):
all_files = all_files + list_of_files(full_path)
else:
all_files.append(full_path)
return all_files
print(list_of_files(<Dir Path>)) # <Dir Path> ==> your directory path

How to copy files selected from a txt file to another folder python

Folder A has more than 100 files, folder B is my destination folder. I want to copy 10 files in folder A to folder B. The 10 files names are in the text file C.
import os
import shutil
from glob import glob
namelist = open('/Users/C.txt').read().splitlines()
input = '/Users/A'
output = '/Users/B'
path = '/Users/A'
files = glob(path)
for path in files:
filedir, filename = os.path.split(path)
for filename in namelist:
shutil.copy2(input,output)
It returns an Error. Please help me to do it in Python, thanks a lot!

There are a lot of things that you can do with your code:
import os
import shutil
from glob import glob
#namelist = open('/Users/C.txt').read().splitlines()
# context manager will take care of closing the file after open
# no need read as one string and do splitlines, readlines take care of that
with open('/Users/C.txt') as fp:
namelist = fp.readlines()
input = '/Users/A'
output = '/Users/B'
path = '/Users/A'
files = os.listdir(path)
# dont need glob import as you already imported os
#files = glob(path)
# loop only through files mentioned in the text file and see if they are available in
# folder A
for file_name in namelist:
file_path = os.path.join(input,file_name)
if file_path in files:
dest_path = os.path.join(output,file_name)
shutil.copy(file_path,dest_path)
#for path in files:
# filedir, filename = os.path.split(path)
# for filename in namelist:
# shutil.copy2(input,output)

I do not have sample data or error message to check. From what i can see in your code,
for path in files:
filedir, filename = os.path.split(path)
if filename in namelist:
shutil.copy2(input,output)

Your paths are from the root folder because of the starting forward slash. Try putting a dot in front of them if the folders and files are relative to the location of your .py file or no preceding slash:
./Users/A or Users/A

Read and write multiple files files in python? [duplicate]

I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.

I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()

import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt

You may find the fileinput module useful. It is designed for exactly this problem.

I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff

Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'

from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]

I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)

This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()

Move/rewrite a txt file that matches a string in a folder with multiple txt files

I'm trying to use python to search for a string in a folder which contains multiple .txt files.
My objective is to find those files containing the string and move/or re-write them in another folder.
what I have tried is:
import os
for filename in os.listdir('./*.txt'):
if os.path.isfile(filename):
with open(filename) as f:
for line in f:
if 'string/term to be searched' in line:
f.write
break
probably there is something wrong with this but, of course, cannot figure it out.

os.listdir argument must be a path, not a pattern. You can use glob to accomplish that task:
import os
import glob
for filename in glob.glob('./*.txt'):
if os.path.isfile(filename):
with open(filename) as f:
for line in f:
if 'string/term to be searched' in line:
# You cannot write with f, because is open in read mode
# and must supply an argument.
# Your actions
break

As Antonio says, you cannot write with f because it is open in read mode.
A possible solution to avoid the problem is the following:
import os
import shutil
source_dir = "your/source/path"
destination_dir = "your/destination/path"
for top, dirs, files in os.walk(source_dir):
for filename in files:
file_path = os.path.join(top, filename)
check = False
with open(file_path, 'r') as f:
if 'string/term to be searched' in f.read():
check = True
if check is True:
shutil.move(file_path, os.path.join(destination_dir , filename))
Remember that if your source_dir or destination_dir contains some "special characters" you have to put the double back-slash.
For example, this:
source_dir = "C:\documents\test"
should be
source_dir = "C:\documents\\test"

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Counting "something" in multiple text files in one folder - python

That should be: data = open(infile, 'r').read() print data.count('Tmp')

import os import glob import tkFileDialog directory = tkFileDialog.askdirectory() for infile in glob.glob(os.path.join(directory, '*')): if os.path.isfile(infile): f = open(infile) print os.path.split(infile)[-1], f.read().count('Tmp')

Related

python find file type and save path and name to txt/csv [duplicate]

Reading multiple txt files from multiple folders

How to copy files selected from a txt file to another folder python

Read and write multiple files files in python? [duplicate]

Move/rewrite a txt file that matches a string in a folder with multiple txt files

Categories

Resources