I'm having issues scanning through a root directory and modifying all .php files that contain a certain reference. Essentially, we're looking to move our entire database. I have to find all records of certain tables and rename them appropriately. Here's the code I have so far:
import os
import re
directory = 'C:/Users/me/Desktop/wsphp'
for root, dirs, files in os.walk(directory):
for filename in files:
if filename.endswith('.php'):
print(filename)
open_file = open(filename, 'r')
read_file = open_file.read()
regex = re.compile('OLD DATABASE NAME')
read_file = regex.sub('NEW DATABASE NAME', read_file)
write_file = open(filename, 'w')
write_file.write(read_file)
My code breaks when it attempts to open the file. The problem seems to be that 'filename' refers to JUST the filename without the entire directory ('index.php' rather than 'C:/Users/me/Desktop/wsphp/Subfolder/Subfolder2/index.php'). The root directory contains a few .php files as well as a bunch of subdirectories. Is there an easier way to go about this?
As you suspected, filename is just the filename. The path to the file is stored in root, so you need to do
open_file = open(os.path.join(root, filename), 'r')
Related
I'm using the following code to extract .gz files from all the subdirectories.
I am able to extract the files but the extracted files are not being saved in the subdirectory rather they are getting saved in the main directory/root:
import os, gzip, shutil
dir_name = r'C:\Users\ngowda\Downloads\DO_driver_logs'
def gz_extract(directory):
extension = ".gz"
os.chdir(directory)
for root,dirs,files in os.walk(directory):
for f in files:
if f.endswith(extension): # check for ".gz" extension
gz_name = os.path.join(root, f) # get full path of files
print('gz_name',gz_name)
file_name = (os.path.basename(gz_name)).rsplit('.',1)[0]
print('file_name',file_name)
with gzip.open(gz_name,"rb") as f_in, open(file_name,"wb") as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(gz_name) # delete zipped fil
I want all the extracted .gz files to be saved in same subdirectory and not in main directory. Can someone help me on this?
You are writing to file described by file_name which is
file_name = (os.path.basename(gz_name)).rsplit('.',1)[0]
os.path.basename does Returns the final component of a pathname therefore it ends in current working directory. As
gz_name = os.path.join(root, f) # get full path of files
is already absolute path, it should be sufficient to eliminate os.path.basename i.e. replacing
file_name = (os.path.basename(gz_name)).rsplit('.',1)[0]
using
file_name = gz_name.rsplit('.',1)[0]
Maybe it is because of file_name not having the full path. Change this line:
with gzip.open(gz_name,"rb") as f_in, open(file_name,"wb") as f_out:
to
with gzip.open(gz_name,"rb") as f_in, open(os.path.join(root, file_name),"wb") as f_out:
Let me know if it worked.
I have a directory with hundreds of folders and subfolders and subfolders within subfolders (in windows).
I'm just trying to create a small txt file with the word "test" in all of these folders.
I've tried something like this but can't get it to work properly:
for i in root_dir:
filename = "test.txt"
with open(filename, "w") as f:
f.write("Test")
attempt2
#only makes one file in the root directory - no sub directories though
import os
root_path8 = r'C:\Users\max\Downloads\users_visual\mgr8\mgr7\mgr6'
for i in next(os.walk(root_path8))[1]:
# print(j)
print(i)
root_dir = root_path8
filename = "test.txt"
filepath = os.path.join(root_dir, filename)
if not os.path.exists(root_dir):
os.makedirs(root_dir)
f = open(filepath, "a")
f.write("Test")
f.close()
Use walk to get all subfolders and sub-subfolders etc. within your directory. This iteration returns 3 values (current folder, subfolders, files); pass the first value to open, using os.path.join to concatenate the folder name and the file name. E.g.
import os
folder_iter = os.walk(root_dir)
for current_folder, _, _ in folder_iter:
filename = "test.txt"
with open(os.path.join(current_folder, filename), "w") as f:
f.write("Test")
BTW if it's the same file in every case, i.e. you don't need to CREATE each file separately, probably a more efficient way would be to create one file to begin with and then copy it to each folder in the iteration.
I am trying to write out the filepath for files with specific file extensions to a text file. There are some files that have different extensions but the same file name, and I am assuming these are duplicates and only want to retain one entry. Here is what I have for code - it is not writing anything out to the file. What am I missing?
import os
path = r'S:\Photogr\ASC'
file_ext_lst = ['.2dm','.2de','.3dm','.3de','.dgn']
txtfile = r'D:\test\microstation_filenames_paths.txt'
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
fullPath = os.path.join(dirpath, filename)
name = os.path.splitext(filename)[0]
if filename[-4:] in file_ext_lst:
with open(txtfile,'r+') as f:
for line in f:
if name not in line:
f.write(fullPath +'\n')
f.close()
The following code writes duplicate file names and paths to a text file.
import os
# path = r'S:\Photogr\ASC'
path = 'temp'
file_ext_lst = ['.2dm','.2de','.3dm','.3de','.dgn']
txtfile = r'D:\test\microstation_filenames_paths.txt'
found = dict()
for dirpath, _, filenames in os.walk(path):
for filename in filenames:
fullPath = os.path.join(dirpath, filename)
name,ext = os.path.splitext(filename)
if ext not in file_ext_lst:
continue
if name not in found:
found[name] = fullPath
with open('unique.txt', 'w') as outf:
print >>outf, 'Unique files:'
for name,path in found.iteritems():
print >>outf, '{:<10} {}'.format(name,path)
Disclaimer: I haven't tried creating some sample files and testing the code - if my first two suggestions do not help, I can try and look further!
You can remove f.close() after with open(), Python does that automatically for you.
You can also simplify the with open() block to:
with open(txtfile,'r+') as f:
if name not in f.read():
f.write(fullPath +'\n')
On another note: Opening and writing to your text file could happen a lot, which would be very slow - I would suggest storing your candidates in an array first and writing that to the text file only after the os.walk() part.
I have YEARS of files that follow a fairly simple naming structure and these files need to be read into a program so that their data can be dumped into a SQLite DB. The files follow the format <YYYY>/<MM>/<D>.txt I have some code that gets me close but it was designed to read a file that has a constant name not one that changes.
The relevant code is:
for root, subFolders, files in os.walk(rootdir):
if fnmatch.fnmatch(file, '*.txt') in files:
with open(os.path.join(root, '<filename>'), 'r') as log:
for lines in log:
Something like this:
import os
def list_text_files(root):
"""List all text files below a root directory."""
for dirpath, dirname, filenames in os.walk(root):
for filename in filenames:
if filename.endswith('.txt'):
yield os.path.join(dirpath, filename)
def load_file(path):
"""Load a file into the database."""
with open(path) as fp:
for line in fp:
pass # code goes here
for file in list_text_files('/home/jhacker/logs'):
load_file(file)
This will load all text files below the root. If you want better filtering, you'll have to change it.
I am a total Python Newb
I need to loop through a directory looking for .txt files, and then read and process them individually. I would like to set this up so that whatever directory the script is in is treated as the root of this action. For example if the script is in /bsepath/workDir, then it would loop over all of the files in workDir and its children.
What I have so far is:
#!/usr/bin/env python
import os
scrptPth = os.path.realpath(__file__)
for file in os.listdir(scrptPth)
with open(file) as f:
head,sub,auth = [f.readline().strip() for i in range(3)]
data=f.read()
#data.encode('utf-8')
pth = os.getcwd()
print head,sub,auth,data,pth
This code is giving me an invalid syntax error and I suspect that is because os.listdir does not like file paths in standard string format. Also I dont think that I am doing the looped action right. How do I reference a specific file in the looped action? Is it packaged as a variable?
Any help is appriciated
import os, fnmatch
def findFiles (path, filter):
for root, dirs, files in os.walk(path):
for file in fnmatch.filter(files, filter):
yield os.path.join(root, file)
Use it like this, and it will find all text files somewhere within the given path (recursively):
for textFile in findFiles(r'C:\Users\poke\Documents', '*.txt'):
print(textFile)
os.listdir expects a directory as input. So, to get the directory in which the script resides use:
scrptPth = os.path.dirname(os.path.realpath(__file__))
Also, os.listdir returns just the filenames, not the full path.
So open(file) will not work unless the current working directory happens to be the directory where the script resides. To fix this, use os.path.join:
import os
scrptPth = os.path.dirname(os.path.realpath(__file__))
for file in os.listdir(scrptPth):
with open(os.path.join(scrptPth, file)) as f:
Finally, if you want to recurse through subdirectories, use os.walk:
import os
scrptPth = os.path.dirname(os.path.realpath(__file__))
for root, dirs, files in os.walk(scrptPth):
for filename in files:
filename = os.path.join(root, filename)
with open(filename, 'r') as f:
head,sub,auth = [f.readline().strip() for i in range(3)]
data=f.read()
#data.encode('utf-8')