How do I modify a filepath using the os.path module? - python

My code
import os.path #gets the module
beginning = input("Enter the file name/path you would like to upperify: ")
inFile = open(beginning, "r")
contents = inFile.read()
moddedContents = contents.upper() #makes the contents of the file all caps
head,tail = os.path.split(beginning) #supposed to split the path
new_new_name = "UPPER" + tail #adds UPPER to the file name
final_name = os.path.join(head + new_new_name) #rejoins the path and new file name
outFile = open(final_name, "w") #creates new file with new capitalized text
outFile.write(moddedContents)
outFile.close()
I'm just trying to change the file name to add UPPER to the beginning to the file name via os.path.split(). Am I doing something wrong?

Change
final_name = os.path.join(head + new_new_name)
to
final_name = head + os.sep + new_new_name

head from os.path.split doesn't have a trailing slash in the end. When you join the head and new_new_name by concatenating them
head + new_new_name
you don't add that missing slash, so the whole path becomes invalid:
>>> head, tail = os.path.split('/etc/shadow')
>>> head
'/etc'
>>> tail
'shadow'
>>> head + tail
'/etcshadow'
The solution is to use os.path.join properly:
final_name = os.path.join(head, new_new_name)

Related

Replacing a set of characters in a string

I have this code below trying to remove the leading characters of a string by using an indicator to where to stop the trim.
I just want to know if there are some better ways to do this.
#Get user's input: file_name
file_name = str.casefold(input("Filename: ")).strip()
#Get the index of "." from the right of the string
i = file_name.rfind(".")
# getting the file extension from the index i
ext = file_name[i+1:]
# Concatinating "image/" with the extractted file extension
new_fname = "image/" + ext
print(new_fname)
Looking at your code you can shorten it to:
file_name = input("Filename: ")
new_fname = f"image/{file_name.rsplit('.', maxsplit=1)[-1]}"
print(new_fname)

Move files into different folders based on a text list Python

I´m new to python and I´ve been trying to simplify a manual task that I do on my daily bases, I have a text file with a list of file names separated in groups by a blank line, like this:
fileName1
fileName2
fileName3
fileName4
fileName5
fileName6
fileName7
fileName8
fileName9
fileName10
fileName11
fileName12
All of this files are in one folder and I want to find each group of files and move them into separate folders the name of the new folders should be the name of the first file of each group.
I´m doing my research and I found how to do each step separately using os and shutil modules but I can´t find a way to join them together and make a beautiful script, any help that I can get from you guys will be awesome, thanks!!
Here's a little script that can do that.
I've made two assumptions:
The file with the list of files is stored in the same directory as source files
There is a blank line after the last file so the script can grab the last group
import os
from shutil import move
from itertools import groupby
#Where the files are originally stored
src_path = "C:\\temp\\src\\"
#Where the group folders will go
dest_path = "C:\\temp\\dest\\"
#Open up the file containing the list of files
with open(src_path + "list_of_files.txt") as txt:
lines = txt.readlines() #Read the file
#Split the contents of the file based on the newline character "\n"
i = (list(g) for _, g in groupby(lines, key='\n'.__ne__))
list_of_groups = [a + b for a, b in zip(i, i)]
#Iterate through each group
for group in list_of_groups:
folder_name = dest_path + group[0].replace("\n","") + "\\"
if not os.path.exists(folder_name):
#Create a folder for each group if it doesn't already exist
os.mkdir(folder_name)
#Move over each file in the group. The last element in the group is a newline character
for file in group:
if file != "\n":
move(src_path + file.replace("\n",""),folder_name + file.replace("\n",""))
When reading a file you can look up characters. Blank spaces have a newline character as represented by \n.
import os
filepath1 = os.getcwd() # current working directory
filepath2 = "path\\to\\where\\you\\want\\dir"
filename = os.path.join(filepath1,"yourfilename.txt")
dirName = []
groupName = "filegroup"
idx = 1
newPath = ""
init = True
file = open(filename, "r")
for line in file:
if init == True: # initial folder
newPath = os.path.join(filepath2,groupName + str(idx))
os.mkdir(newPath)
dirName.append(groupName)
init = False
if line == "\n": # line in file is empty
idx += 1
newName = groupName + str(idx)
dirName.append(newName)
newPath = filepath2 + dirName[idx-1]
os.mkdir(newPath)
else:
os.mkdir(os.path.join(newPath,line.rstrip()))
file.close()

How to unzip all folders/files that end in .zip and extract “file.txt” file from each zipped folder

My code currently unzips one zip folder and finds the file called file.txt and extracts it. Now I need to unzip multiple folders that have the extension .zip. I have tried to use code similar to what I need it to do but the problem is that now I have to find a file called file.txt in each of those .zip folders and extract that file only . Also to store file.txt into a separate folder that has the same name where it came from. Thank you in advance for your time.
import re
import os
from zipfile import ZipFile
def pain():
print("\t\t\tinput_files.zip has been unzipped")
with ZipFile('input_files.zip', 'r') as zipObj:
zipObj.extractall()
listOfFileNames = zipObj.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.extract(fileName, 'storage')
outfile = "output2.txt" #this will be the filename that the code will write to
baconFile = open(outfile,"wt")
file_name1 = "file.txt"
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
#for filename in os.listdir(os.getcwd() + "/input_files"):
for filename in os.listdir('C:\Users\M29858\Desktop\TestPy\Version10\input_files'):
with open("input_files/" + filename, 'r') as f:
if file_name1 in filename:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
pain()
#THIS WORKS
import glob
import os
from zipfile import ZipFile
def main():
for fname in glob.glob("*.zip"): # get all the zip files
with ZipFile(fname) as archive:
# if there's no file.txt, ignore and go on to the next zip file
if 'file.txt' not in archive.namelist(): continue
# make a new directory named after the zip file
dirname = fname.rsplit('.',1)[0]
os.mkdir(dirname)
extract file.txt into the directory you just created
archive.extract('file.txt', path=dirname)

Find files in a directory containing desired string in Python

I'm trying to find a string in files contained within a directory. I have a string like banana that I know that exists in a few of the files.
import os
import sys
user_input = input("What is the name of you directory?")
directory = os.listdir(user_input)
searchString = input("What word are you trying to find?")
for fname in directory: # change directory as needed
if searchString in fname:
f = open(fname,'r')
print('found string in file %s') %fname
else:
print('string not found')
When the program runs, it just outputs string not found for every file. There are three files that contain the word banana, so the program isn't working as it should. Why isn't it finding the string in the files?
You are trying to search for string in filename, use open(filename, 'r').read():
import os
user_input = input('What is the name of your directory')
directory = os.listdir(user_input)
searchstring = input('What word are you trying to find?')
for fname in directory:
if os.path.isfile(user_input + os.sep + fname):
# Full path
f = open(user_input + os.sep + fname, 'r')
if searchstring in f.read():
print('found string in file %s' % fname)
else:
print('string not found')
f.close()
We use user_input + os.sep + fname to get full path.
os.listdir gives files and directories names, so we use os.path.isfile to check for files.
Here is another version using the Path module from pathlib instead of os.
def search_in_file(path,searchstring):
with open(path, 'r') as file:
if searchstring in file.read():
print(f' found string in file {path.name}')
else:
print('string not found')
from pathlib import Path
user_input = input('What is the name of your directory')
searchstring = input('What word are you trying to find?')
dir_content = sorted(Path(user_input).iterdir())
for path in dir_content:
if not path.is_dir():
search_in_file(path, searchstring)
This is my solution for the problem. It comes with the feature of also checking in sub-directories, as well as being able to handle multiple file types. It is also quite easy to add support for other ones. The downside is of course that it's quite chunky code. But let me know what you think.
import os
import docx2txt
from pptx import Presentation
import pdfplumber
def findFiles(strings, dir, subDirs, fileContent, fileExtensions):
# Finds all the files in 'dir' that contain one string from 'strings'.
# Additional parameters:
# 'subDirs': True/False : Look in sub-directories of your folder
# 'fileContent': True/False :Also look for the strings in the file content of every file
# 'fileExtensions': True/False : Look for a specific file extension -> 'fileContent' is ignored
filesInDir = []
foundFiles = []
filesFound = 0
if not subDirs:
for filename in os.listdir(dir):
if os.path.isfile(os.path.join(dir, filename).replace("\\", "/")):
filesInDir.append(os.path.join(dir, filename).replace("\\", "/"))
else:
for root, subdirs, files in os.walk(dir):
for f in files:
if not os.path.isdir(os.path.join(root, f).replace("\\", "/")):
filesInDir.append(os.path.join(root, f).replace("\\", "/"))
print(filesInDir)
# Find files that contain the keyword
if filesInDir:
for file in filesInDir:
print("Current file: "+file)
# Define what is to be searched in
filename, extension = os.path.splitext(file)
if fileExtensions:
fileText = extension
else:
fileText = os.path.basename(filename).lower()
if fileContent:
fileText += getFileContent(file).lower()
# Check for translations
for string in strings:
print(string)
if string in fileText:
foundFiles.append(file)
filesFound += 1
break
return foundFiles
def getFileContent(filename):
'''Returns the content of a file of a supported type (list: supportedTypes)'''
if filename.partition(".")[2] in supportedTypes:
if filename.endswith(".pdf"):
content = ""
with pdfplumber.open(filename) as pdf:
for x in range(0, len(pdf.pages)):
page = pdf.pages[x]
content = content + page.extract_text()
return content
elif filename.endswith(".txt"):
with open(filename, 'r') as f:
content = ""
lines = f.readlines()
for x in lines:
content = content + x
f.close()
return content
elif filename.endswith(".docx"):
content = docx2txt.process(filename)
return content
elif filename.endswith(".pptx"):
content = ""
prs = Presentation(filename)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
content = content+shape.text
return content
else:
return ""
supportedTypes = ["txt", "docx", "pdf", "pptx"]
print(findFiles(strings=["buch"], dir="C:/Users/User/Desktop/", subDirs=True, fileContent=True, fileExtensions=False))
Here is the most simple answer I can give you. You don't need the colors, they are just cool and you may find that you can learn more than one thing in my code :)
import os
from time import sleep
#The colours of the things
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# Ask the user to enter string to search
search_path = input("Enter directory path to search : ")
file_type = input("File Type : ")
search_str = input("Enter the search string : ")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname, 'r')
# Read the first line from the file
line = fo.read()
# Initialize counter for line number
line_no = 1
# Loop until EOF
if line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(bcolors.OKGREEN + '[+]' + bcolors.ENDC + ' ', fname, sep="")
print(' ')
sleep(0.01)
else:
print(bcolors.FAIL + '[-]' + bcolors.ENDC + ' ', fname, ' ', 'does not contain', ' ', search_str, sep="")
print(" ")
sleep(0.01)
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
That is it!
I was trying with the following code for this kind of problem, please have a look.
import os,sys
search_path=input("Put the directory here:")
search_str = input("Enter your string")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()

copy files from folder to folder python

I know this question has been answered, but my application uses the solutions, yet am facing bugs, that am not able to solve.
I have a list of numbers in a text file, that denote the image numbers that should be copied. The list is similar to this
7348
7352
7357
7360
7380
7381
.
.
.
The images are with name
IMG_7348.jpg
IMG_7349.jpg
.
.
.
Using the numbers from the text file, I want to copy only those images to a different folder. This is the python code I wrote for the same
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
with open("image_numbers.txt") as f:
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
for line in lines:
numbers_str = line
#print(numbers_str)
temp2 = str('IMG_')+numbers_str+str('.jpg')
#print(temp2)
for name_im in src_files:
#print(name_im)
print(name_im == temp2)
if name_im == temp2:
src_files_filt = temp2
#print('stored')
#numbers_float = [float(x) for x in numbers_str]
#map(float,numbers_str) works too
for file_name in src_files_filt:
full_file_name = os.path.join(src, file_name)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)
When I use the print statements, I get to see that the reformed image name and the name from the src are the same, yet the statement
print(name_im == temp2)
gives me
false
I am not able to figure out the reason
Can you please help me fix the error?
I'm not too sure why the 2 errors were occurring. The second problem you detailed didn't occur for me but I fixed your code to make it cleaner and more pythonic. There would have been a problem with "src_files_filt" since the last for loop iterated it like a list but "src_files_filt" was only a string. I made it such that the script performs the file change right after the filenames are matched.
Edit: Looking over your program again for problem 1, some of the values should return false since there are other files present that are not in the text file. If you place the print statement inside the if block, it should always return true as expected.
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
with open("image_numbers.txt") as f:
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
for line in lines:
numbers_str = line
temp1 = 'IMG_' + numbers_str + '.jpg'
for name_im in src_files:
if name_im == temp1:
full_file_name = os.path.join(src, temp1)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)
print(str(name_im) == temp2) return true
You are already opening "image_numbers.txt"and iterating through each line, so you don't need to open it again to iterate through to strip "\n".
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
This can be achieved more easily by striping "\n" when iterating for line in f. When I ran your code, it did not remove the "\n" which prevented the it from evaluating True in print(name_im == temp2). Additionally, you can't iteratefor file_name in src_files_filt:, because src_files_filt in your code is not a list, rather it is the name of a single file.
Try the following:
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
src_files_filt = []
with open("image_numbers.txt") as f:
for line in f:
numbers_str = line.rstrip()
#temp2 = "IMG_%s.jpg" %(numbers_str) #another str manipulation method
temp2 = str('IMG_')+numbers_str+str('.jpg')
#print(temp2)
for name_im in src_files:
print(name_im)
print(name_im == temp2)
if name_im == temp2:
src_files_filt.append(temp2)
for file_name in src_files_filt:
#print(file_name)
full_file_name = os.path.join(src, file_name)
#print(full_file_name)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)enter code here

Categories

Resources