Find fileS and then find a string in those files - python

I have written a function that finds all of the version.php files in a path. I am trying to take the output of that function and find a line from that file. The function that finds the files is:
def find_file():
for root, folders, files in os.walk(acctPath):
for file in files:
if file == 'version.php':
print os.path.join(root,file)
find_file()
There are several version.php files in the path and I would like to return a string from each of those files.
Edit:
Thank you for the suggestions, my implementation of the code didn't fit my need. I was able to figure it out by creating a list and passing each item to the second part. This may not be the best way to do it, I've only been doing python for a few days.
def cmsoutput():
fileList = []
for root, folders, files in os.walk(acctPath):
for file in files:
if file == 'version.php':
fileList.append(os.path.join(root,file))
for path in fileList:
with open(path) as f:
for line in f:
if line.startswith("$wp_version ="):
version_number = line[15:20]
inst_path = re.sub('wp-includes/version.php', '', path)
version_number = re.sub('\';', '', version_number)
print inst_path + " = " + version_number
cmsoutput()

Since you want to use the output of your function, you have to return something. Printing it does not cut it. Assuming everything works it has to be slightly modified as follows:
import os
def find_file():
for root, folders, files in os.walk(acctPath):
for file in files:
if file == 'version.php':
return os.path.join(root,file)
foundfile = find_file()
Now variable foundfile contains the path of the file we want to look at. Looking for a string in the file can then be done like so:
with open(foundfile, 'r') as f:
content = f.readlines()
for lines in content:
if '$wp_version =' in lines:
print(lines)
Or in function version:
def find_in_file(string_to_find, file_to_search):
with open(file_to_search, 'r') as f:
content = f.readlines()
for lines in content:
if string_to_find in lines:
return lines
# which you can call it like this:
find_in_file("$wp_version =", find_file())
Note that the function version of the code above will terminate as soon as it finds one instance of the string you are looking for. If you wanna get them all, it has to be modified.

Related

Merge content of textfile to every file in folder

I want to modify several text-files within a folder.
I have the following code:
if command == "deployf":
for root, dirs, files in os.walk(all_posts, topdown = False):
for name in files:
if name.endswith(".html"):
file_name = os.path.join(root, name)
with open(file_name, 'r+') as fp:
lines = fp.readlines()
fp.seek(0)
fp.truncate()
fp.writelines(lines[:-9])
print('deployed to all files')
This deletes the last 9 lines in every html file in a folder. Now I want to merge (or append) the content of another .html file to the end of every file in the folder but I don`t know how.
You can ask for the path to the HTML file outside your loop:
path = input("Enter HTML File path to append to each file:")
Then read from the file:
root_content = open(path, 'r').readlines()
Then instead of removing the last 9 lines with fp.writelines(lines[:-9]), just write the root_content variable:
fp.writelines(root_content)
Im assuming this is what you want to do? You had all the knowledge shown in your problem to accomplish this, so please comment if i have misunderstood.
IIUC, you need to replace the last 9 lines of each (.html) with the content of your other file, right ?
If so, and to reduce visible noise, I would use Path.rglob from pathlib with slicing :
from pathlib import Path
if command == "deployf":
all_posts = Path(all_posts)
to_append = (all_posts / "the_other_file.html").read_text()
for html in all_posts.rglob("*.html"):
lines = html.read_text().splitlines()
html.write_text("\n".join(lines[:-9] + [to_append]))
print("deployed to all files")
If you need to replace a slice in the middle (e.g 5:10) of each (.html), use this :
lines = html.read_text().splitlines()
lines = lines[:4] + [to_append] + lines[10:]
html.write_text("\n".join(lines))

Search for a string in multiple .csv files from a multiple zipped folders

I'm trying to execute a script that will unzip all files in a zipped folder which has multiple txts and .csv files, search only the .csv files for a string, if it contains that string, copy the entire zipped folder to a new folder, if it doesn't, move on to the next zipped folder. I have several scripts that do part of this but can't piece them together. I am a beginner in python so this script looks like it gets complicated.
This script prints the files in the zipped folder, my next step is to search within the .csv files it contains for the string PROGRAM but I don't know how to code it, I'm thinking it goes at the end of this code since it looks like it's running through a loop.
import os
import pandas as pd
import zipfile
curDir = os.getcwd()
zf = zipfile.ZipFile(curDir + '\namedfile.zip')
text_files = zf.infolist()
list_ = []
print ("Uncompressing and reading data... ")
for text_file in text_files:
print(text_file.filename)
I wrote this script separately, searches for the string PROGRAM in a folder that contains .csv files
import os
from pathlib import Path
#Searches the .csv files within the "AllCSVFiles"
#folder for the string "GBSD"
search_path = "./AllCSVFiles"
file_type = ".csv"
search_str = "PROGRAM"
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
if not os.path.exists(search_path):
search_path ="."
for fname in os.listdir(path=search_path):
if fname.endswith(file_type):
fo = open(search_path + fname)
line = fo.readline()
line_no = 1
while line != '' :
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", sep="")
line = fo.readline()
line_no += 1
fo.close()
Is there an easier way to work this code?
I think the first thing is to make sure you know the structure of the solution.
Reading your description, I'd say it's this:
# Create empty list, for marked zip file
# Iterate over zip files
# Unzip
# Iterate over files
# If file ends in .csv
# If file contains SEARCH_STR
# Mark this zip file to be copied
# Stop searching this zip file
# Iterate marked zip files
# Copy zip file to DEST_DIR
If that is the structure, is this enough to help you see where to put your code?
After that, you can clean up your search for search_str in file quite a bit:
with open(search_path + fname) as csv_file:
line_no = 0
for line in csv_file:
line_no += 1
if search_str in line:
search_index = line.index(search_str)
print(f'{fname}[{line_no},{search_index}]')
# Mark the zip file this csv_file is in
# figure out how to stop searching this zip file
for line in csv_file: text files opened in Python have a built-in mechanism for iterating over lines
if search_str in line: if you don't need to know the line exactly where search_str is, simply test for membership, is search_str in the string line?

Function that returns the content of multiple .txt files python

I'm trying to write a function that traverses a given path and opens/reads all the .txt files therein and returns these as a string or returns a value that I can use to apply text normalization.
Currently my code only returns the first .txt file it finds, except when I use a print(f.read()) statement, then it prints all the files it read.
I would like it to return all the files
def readtxt(path):
import os
for subdir, dirs, files in os.walk(path):
for file in files:
filepath = subdir + os.sep + file
if file.endswith(".txt"):
filelist = filepath.split()
for file in filelist:
with open(os.path.join(path, filepath), 'r') as f:
lines = (f.read())
return lines
readtxt('/Users/path/')
When you use a return statement, the function ends. That's why it only gives you 1 file - it stops when it finds one. You could add them all into a list & return that afterwards instead.
found = []
And then, inside of your loops, you simply do:
with open(os.path.join(path, filepath), 'r') as f:
lines = (f.read())
found.append(lines) # Append to list instead of returning
so that you can return everything you found using:
return found

Deleting a line in multiple files in python

i am a beginner in python and i am practicing at the moment.
So what I want to do is a script that finds a line that I am writing with raw_input and that will search this line in multiple files and delete it.
Something like this but for more files:
word = raw_input("word: ")
f = open("file.txt","r")
lines = f.readlines()
f.close()
f = open("file.txt","w")
for line in lines:
if line!=mail+"\n":
f.write(line)
f.close()
It's an easy task but it's actually hard for me since I can't find an example anywhere.
Instead of reading the entire file into memory, you should iterate through the file and write the lines that are OK to a temporary file. Once you've gone through the entire file, delete it and rename the temporary file to the name of the original file. This is a classic pattern that you'll most likely frequently encounter in the future.
I'd also recommend breaking this down into functions. You should first write the code for removing all occurrences of a line from only a single file. Then you can write another function that simply iterates through a list of filenames and calls the first function (that operates on individual files).
To get the filenames of the all the files in the directory, use os.walk. If you do not want to apply this function to all of the files in the directory, you can set the files variable yourself to store whatever configuration of filenames you want.
import os
def remove_line_from_file(filename, line_to_remove, dirpath=''):
"""Remove all occurences of `line_to_remove` from file
with name `filename`, contained at path `dirpath`.
If `dirpath` is omitted, relative paths are used."""
filename = os.path.join(dirpath, filename)
temp_path = os.path.join(dirpath, 'temp.txt')
with open(filename, 'r') as f_read, open(temp_path, 'w') as temp:
for line in f_read:
if line.strip() == line_to_remove:
continue
temp.write(line)
os.remove(filename)
os.rename(temp_path, filename)
def main():
"""Driver function"""
directory = raw_input('directory: ')
word = raw_input('word: ')
dirpath, _, files = next(os.walk(directory))
for f in files:
remove_line_from_file(f, word, dirpath)
if __name__ == '__main__':
main()
TESTS
All of these files are in the same directory. On the left is what they looked like before running the command, on the right is what they look like afterwards. The "word" I input was Remove this line.
a.txt
Foo Foo
Remove this line Bar
Bar Hello
Hello World
Remove this line
Remove this line
World
b.txt
Nothing Nothing
In In
This File This File
Should Should
Be Changed Be Changed
c.txt
Remove this line
d.txt
The last line will be removed The last line will be removed
Remove this line
something like this should work:
source = '/some/dir/path/'
for root, dirs, filenames in os.walk(source):
for f in filenames:
this_file = open(os.path.join(source, f), "r")
this_files_data = this_file.readlines()
this_file.close()
# rewrite the file with all line except the one you don't want
this_file = open(os.path.join(source, f), "w")
for line in this_files_data:
if line != "YOUR UNDESIRED LINE HERE":
this_file.write(line)
this_file.close()

Iterating files with os.walk, but cannot open and print text files

Currently I am trying to write a function will walk through the requested directory and print all the text of all the files.
Right now, the function works in displaying the file_names as a list so the files surely exist (and there is text in the files).
def PopularWordWalk (starting_dir, word_dict):
print ("In", os.path.abspath(starting_dir))
os.chdir(os.path.abspath(starting_dir))
for (this_dir,dir_names,file_names) in os.walk(starting_dir):
for file_name in file_names:
fpath = os.path.join(os.path.abspath(starting_dir), file_name)
fileobj = open(fpath, 'r')
text = fileobj.read()
print(text)
Here is my output with some checking of the directory contents:
>>> PopularWordWalk ('text_dir', word_dict)
In /Users/normanwei/Documents/Python for Programmers/Homework 4/text_dir
>>> os.listdir()
['.DS_Store', 'cats.txt', 'zen_story.txt']
the problem is that whenever i try to print the text, i get nothing. eventually I want to push the text through some other functions but as of now it seems moot without any text. Can anyone lend any experience on why no text is appearing? (when trying to open files/read/storing&printing text manually in idle it works i.e. if I just manually inputted 'cats.txt' instead of 'file_name') - currently running python 3.
EDIT - The question has been answered - just have to remove the os.chdir line - see jojo's answer for explanation.
This line won't work
file = open(file_name, 'r')
Because it would require that these files exist in the same folder you are running the script from. You would have to provide the path to those files, as well as the file names
with open(os.path.join(starting_dir,file_name), 'r') as file:
#do stuff
This way it will build the full path from the directory and the file name.
If you do os.chdir(os.path.abspath(starting_dir)) you go into starting_dir. Then for (this_dir,dir_names,file_names) in os.walk(starting_dir): will loop over nothing since starting_dir is not in starting_dir.
Long story short, comment the line os.chdir(os.path.abspath(starting_dir)) and you should be good.
Alternatively if you want to stick to the os.chdir, this should do the job:
def PopularWordWalk (starting_dir, word_dict):
print ("In", os.path.abspath(starting_dir))
os.chdir(os.path.abspath(starting_dir))
for (this_dir,dir_names,file_names) in os.walk('.'):
for file_name in file_names:
fpath = os.path.join(os.path.abspath(starting_dir), file_name)
with open(fpath, 'r') as fileobj:
text = fileobj.read()
print(text)
You'll want to join the root path with the file path. I'd change:
file = open(file_name, 'r')
to
fpath = os.path.join(this_dir, file_name)
file = open(fpath, 'r')
You may also want to use another word to describe it than file as that's a built-in function in Python. I'd recommend fileobj.
Just to add on to the previous answer, you will have to join the absolute path and the relative path of the walk.
Try this:
fpath = os.path.abspath(os.path.join(this_dir, file_name))
f = open(fpath, 'r')

Categories

Resources