copy files from folder to folder python - python

I know this question has been answered, but my application uses the solutions, yet am facing bugs, that am not able to solve.
I have a list of numbers in a text file, that denote the image numbers that should be copied. The list is similar to this
7348
7352
7357
7360
7380
7381
.
.
.
The images are with name
IMG_7348.jpg
IMG_7349.jpg
.
.
.
Using the numbers from the text file, I want to copy only those images to a different folder. This is the python code I wrote for the same
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
with open("image_numbers.txt") as f:
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
for line in lines:
numbers_str = line
#print(numbers_str)
temp2 = str('IMG_')+numbers_str+str('.jpg')
#print(temp2)
for name_im in src_files:
#print(name_im)
print(name_im == temp2)
if name_im == temp2:
src_files_filt = temp2
#print('stored')
#numbers_float = [float(x) for x in numbers_str]
#map(float,numbers_str) works too
for file_name in src_files_filt:
full_file_name = os.path.join(src, file_name)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)
When I use the print statements, I get to see that the reformed image name and the name from the src are the same, yet the statement
print(name_im == temp2)
gives me
false
I am not able to figure out the reason
Can you please help me fix the error?

I'm not too sure why the 2 errors were occurring. The second problem you detailed didn't occur for me but I fixed your code to make it cleaner and more pythonic. There would have been a problem with "src_files_filt" since the last for loop iterated it like a list but "src_files_filt" was only a string. I made it such that the script performs the file change right after the filenames are matched.
Edit: Looking over your program again for problem 1, some of the values should return false since there are other files present that are not in the text file. If you place the print statement inside the if block, it should always return true as expected.
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
with open("image_numbers.txt") as f:
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
for line in lines:
numbers_str = line
temp1 = 'IMG_' + numbers_str + '.jpg'
for name_im in src_files:
if name_im == temp1:
full_file_name = os.path.join(src, temp1)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)

print(str(name_im) == temp2) return true

You are already opening "image_numbers.txt"and iterating through each line, so you don't need to open it again to iterate through to strip "\n".
lines = [line.rstrip('\n') for line in open('image_numbers.txt')]
This can be achieved more easily by striping "\n" when iterating for line in f. When I ran your code, it did not remove the "\n" which prevented the it from evaluating True in print(name_im == temp2). Additionally, you can't iteratefor file_name in src_files_filt:, because src_files_filt in your code is not a list, rather it is the name of a single file.
Try the following:
import os
import shutil
src = input('Enter the source folder');
dest = input('Enter the destination folder');
src_files = os.listdir(src)
src_files_filt = []
with open("image_numbers.txt") as f:
for line in f:
numbers_str = line.rstrip()
#temp2 = "IMG_%s.jpg" %(numbers_str) #another str manipulation method
temp2 = str('IMG_')+numbers_str+str('.jpg')
#print(temp2)
for name_im in src_files:
print(name_im)
print(name_im == temp2)
if name_im == temp2:
src_files_filt.append(temp2)
for file_name in src_files_filt:
#print(file_name)
full_file_name = os.path.join(src, file_name)
#print(full_file_name)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)enter code here

Related

How to unzip all folders/files that end in .zip and extract “file.txt” file from each zipped folder

My code currently unzips one zip folder and finds the file called file.txt and extracts it. Now I need to unzip multiple folders that have the extension .zip. I have tried to use code similar to what I need it to do but the problem is that now I have to find a file called file.txt in each of those .zip folders and extract that file only . Also to store file.txt into a separate folder that has the same name where it came from. Thank you in advance for your time.
import re
import os
from zipfile import ZipFile
def pain():
print("\t\t\tinput_files.zip has been unzipped")
with ZipFile('input_files.zip', 'r') as zipObj:
zipObj.extractall()
listOfFileNames = zipObj.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.extract(fileName, 'storage')
outfile = "output2.txt" #this will be the filename that the code will write to
baconFile = open(outfile,"wt")
file_name1 = "file.txt"
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
#for filename in os.listdir(os.getcwd() + "/input_files"):
for filename in os.listdir('C:\Users\M29858\Desktop\TestPy\Version10\input_files'):
with open("input_files/" + filename, 'r') as f:
if file_name1 in filename:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
pain()
#THIS WORKS
import glob
import os
from zipfile import ZipFile
def main():
for fname in glob.glob("*.zip"): # get all the zip files
with ZipFile(fname) as archive:
# if there's no file.txt, ignore and go on to the next zip file
if 'file.txt' not in archive.namelist(): continue
# make a new directory named after the zip file
dirname = fname.rsplit('.',1)[0]
os.mkdir(dirname)
extract file.txt into the directory you just created
archive.extract('file.txt', path=dirname)

Find files in a directory containing desired string in Python

I'm trying to find a string in files contained within a directory. I have a string like banana that I know that exists in a few of the files.
import os
import sys
user_input = input("What is the name of you directory?")
directory = os.listdir(user_input)
searchString = input("What word are you trying to find?")
for fname in directory: # change directory as needed
if searchString in fname:
f = open(fname,'r')
print('found string in file %s') %fname
else:
print('string not found')
When the program runs, it just outputs string not found for every file. There are three files that contain the word banana, so the program isn't working as it should. Why isn't it finding the string in the files?
You are trying to search for string in filename, use open(filename, 'r').read():
import os
user_input = input('What is the name of your directory')
directory = os.listdir(user_input)
searchstring = input('What word are you trying to find?')
for fname in directory:
if os.path.isfile(user_input + os.sep + fname):
# Full path
f = open(user_input + os.sep + fname, 'r')
if searchstring in f.read():
print('found string in file %s' % fname)
else:
print('string not found')
f.close()
We use user_input + os.sep + fname to get full path.
os.listdir gives files and directories names, so we use os.path.isfile to check for files.
Here is another version using the Path module from pathlib instead of os.
def search_in_file(path,searchstring):
with open(path, 'r') as file:
if searchstring in file.read():
print(f' found string in file {path.name}')
else:
print('string not found')
from pathlib import Path
user_input = input('What is the name of your directory')
searchstring = input('What word are you trying to find?')
dir_content = sorted(Path(user_input).iterdir())
for path in dir_content:
if not path.is_dir():
search_in_file(path, searchstring)
This is my solution for the problem. It comes with the feature of also checking in sub-directories, as well as being able to handle multiple file types. It is also quite easy to add support for other ones. The downside is of course that it's quite chunky code. But let me know what you think.
import os
import docx2txt
from pptx import Presentation
import pdfplumber
def findFiles(strings, dir, subDirs, fileContent, fileExtensions):
# Finds all the files in 'dir' that contain one string from 'strings'.
# Additional parameters:
# 'subDirs': True/False : Look in sub-directories of your folder
# 'fileContent': True/False :Also look for the strings in the file content of every file
# 'fileExtensions': True/False : Look for a specific file extension -> 'fileContent' is ignored
filesInDir = []
foundFiles = []
filesFound = 0
if not subDirs:
for filename in os.listdir(dir):
if os.path.isfile(os.path.join(dir, filename).replace("\\", "/")):
filesInDir.append(os.path.join(dir, filename).replace("\\", "/"))
else:
for root, subdirs, files in os.walk(dir):
for f in files:
if not os.path.isdir(os.path.join(root, f).replace("\\", "/")):
filesInDir.append(os.path.join(root, f).replace("\\", "/"))
print(filesInDir)
# Find files that contain the keyword
if filesInDir:
for file in filesInDir:
print("Current file: "+file)
# Define what is to be searched in
filename, extension = os.path.splitext(file)
if fileExtensions:
fileText = extension
else:
fileText = os.path.basename(filename).lower()
if fileContent:
fileText += getFileContent(file).lower()
# Check for translations
for string in strings:
print(string)
if string in fileText:
foundFiles.append(file)
filesFound += 1
break
return foundFiles
def getFileContent(filename):
'''Returns the content of a file of a supported type (list: supportedTypes)'''
if filename.partition(".")[2] in supportedTypes:
if filename.endswith(".pdf"):
content = ""
with pdfplumber.open(filename) as pdf:
for x in range(0, len(pdf.pages)):
page = pdf.pages[x]
content = content + page.extract_text()
return content
elif filename.endswith(".txt"):
with open(filename, 'r') as f:
content = ""
lines = f.readlines()
for x in lines:
content = content + x
f.close()
return content
elif filename.endswith(".docx"):
content = docx2txt.process(filename)
return content
elif filename.endswith(".pptx"):
content = ""
prs = Presentation(filename)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
content = content+shape.text
return content
else:
return ""
supportedTypes = ["txt", "docx", "pdf", "pptx"]
print(findFiles(strings=["buch"], dir="C:/Users/User/Desktop/", subDirs=True, fileContent=True, fileExtensions=False))
Here is the most simple answer I can give you. You don't need the colors, they are just cool and you may find that you can learn more than one thing in my code :)
import os
from time import sleep
#The colours of the things
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# Ask the user to enter string to search
search_path = input("Enter directory path to search : ")
file_type = input("File Type : ")
search_str = input("Enter the search string : ")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname, 'r')
# Read the first line from the file
line = fo.read()
# Initialize counter for line number
line_no = 1
# Loop until EOF
if line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(bcolors.OKGREEN + '[+]' + bcolors.ENDC + ' ', fname, sep="")
print(' ')
sleep(0.01)
else:
print(bcolors.FAIL + '[-]' + bcolors.ENDC + ' ', fname, ' ', 'does not contain', ' ', search_str, sep="")
print(" ")
sleep(0.01)
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
That is it!
I was trying with the following code for this kind of problem, please have a look.
import os,sys
search_path=input("Put the directory here:")
search_str = input("Enter your string")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()

Python: Read multiple files and move them to a directory according to their content

I am quite new to python, but I would like to use it for the following tasks:
read all files in a directory
look for a specific character in all lines of the files
if this character is present only once in the file copy the file in a specific directory.
I tried the following code:
#! /usr/bin/python
import glob
import shutil
path = '/xxxx/Dir/*.txt'
files=glob.glob(path)
for file in files:
f=open(file)
f.read()
total = 0
for line in f:
if "*TPR_4*" in line:
total_line = total + 1
if total_line == 1:
shutil.copy(f, 'xxxx/Test/')
f.close()
However, it is not working.
Any suggestion?
shutil.copy() takes file names as arguments not open files. You should change your call:
shutil.copy(file, 'xxxx/Test/')
Also: file is a poor name choice. It's a built-in function's name.
The logic is not quite correct, also you are mixing up total and total_line and shutil.copy takes the name, not the object as an argument. And note that the if .. in line does not use globbing syntax, i.e. to search for TPR_4, use 'TPR_4', not '*TPR_4*'. Try the following:
#! /usr/bin/python
import glob
import shutil
path = '/xxxx/Dir/*.txt'
files=glob.glob(path)
for file in files:
f=open(file)
total = 0
for line in f:
if "TPR_4" in line:
total += 1
if total > 1:
break # no need to go through the file any further
f.close()
if total == 1:
shutil.copy(file, 'xxxx/Test/')
I wrote some code for your question, maybe it's good for you.
import os, shutil
dir_path = '/Users/Bob/Projects/Demo'
some_char = 'abc'
dest_dir = "/Users/Bob/tmp"
for root, dirs, files in os.walk(dir_path):
for _file in files:
file_path = os.path.join(root, _file)
copy = False
with open(file_path, 'r') as f:
while True:
line = f.readline()
if not line:
break
if str(line).find(some_char) > -1:
copy = True
break
if copy:
shutil.copy(file_path, dest_dir)
print file_path, ' copy...'

Python - extract and modify part of a specific line of text with a function for all files in folder

I'm looking to extract and modify a specific line of text in many files within a folder but I am having some trouble.
For instance, the first file might read:
To: Bob
From: Bill
<Message> The eagle flies at midnight. <End Message>
The second message is different, but same format, and so on. I'd like to extract the third line, pass 'The eagle flies at midnight.' through a function (like base64), and then put it back on the line between 'Message' and 'End Message'. Such that the final output would read:
To: Bob
From: Bill
<Message> VGhlIGVhZ2xlIGZsaWVzIGF0IG1pZG5pZ2h0Lg== <End Message>
This is what I am trying (and adjusting) so far.
import base64
import os
import io
#ask user where his stuff is / is going
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
#get that stuff
myfilepath = os.path.join(directory, '*.txt')
with open('*.txt', 'r') as file:
data = file.readlines()
#Go to line 3 and take out non encoded text.
data[3] = X
X.strip("<Message>")
X.strip("<End Message>")
coded_string = X
#Encode line 3
base64.b64encode(coded_string)
data[3] = '<Message> %s <End Message>' % (coded_string)
# and write everything back
with open('*.txt', 'w') as file:
file.writelines(data)
I'm sure there are numerous problems, particularly with how I am opening and writing back. Bonus points: 99% of the messages in this folder are in this exact format, but there are 1% junk messages (they dont need to be encoded, and line 3 for them is something different). I'm not too worried about them, but if they could be unharmed in the process that'd be nifty. Maybe line 3 should be line 2 if the count starts at 0 ...
Edit: Trying
import re, base64
import os
folder = 'C:/Users/xxx/Desktop/input'
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for filename in os.listdir(folder):
infilename = os.path.join(folder, filename)
if not os.path.isfile(infilename): continue
base, extension = os.path.splitext(filename)
filein = open(infilename, 'r')
fileout = open(os.path.join(folder, '{}_edit.{}'.format(base, extension)), 'w')
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
Ultimately this gives me a bunch of blank files except for the last one which is translated properly.
You can use regular expression to make it easier as:
import re, base64
filein = open("examplein.txt", 'r')
fileout = open("exampleout.txt", 'w')
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
This code works just for one file, you should adapt it to work with all the file in you directory:
import re, base64
import os
folder = '/home/user/Public'
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for filename in os.listdir(folder):
infilename = os.path.join(folder, filename)
if not os.path.isfile(infilename): continue
base, extension = os.path.splitext(filename)
filein = open(infilename, 'r')
fileout = open(os.path.join(folder, '{}_edit.{}'.format(base, extension)), 'w')
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
This code works in my pc

python os.rename(...) won't work !

I am writing a Python function to change the extension of a list of files into another extension, like txt into rar, that's just an idle example. But I'm getting an error. The code is:
import os
def dTask():
#Get a file name list
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename(file_list[entry_pos], new_file_name)
++entry_pos
Error:
>>> dTask()
Expected change from: New Text Document (2).txt
into File name: New Text Document (2).rar
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
dTask()
File "C:\Users\B\Desktop\dTask.py", line 19, in dTask
os.rename(file_list[entry_pos], new_file_name)
WindowsError: [Error 2] The system cannot find the file specified
I can succeed in getting the file name with another extension in variable level as you can see in the print-out, but not in reality because I can not end this process in OS level. The error is coming from os.rename(...). Any idea how to fix this ?
As the others have already stated, you either need to provide the path to those files or switch the current working directory so the os can find the files.
++entry_pos doesn't do anything. There is no increment operator in Python. Prefix + is just there fore symmetry with prefix -. Prefixing something with two + is just two no-ops. So you're not actually doing anything (and after you change it to entry_pos += 1, you're still resetting it to zero in each iteration.
Also, your code is very inelegant - for example, you are using a separate index to file_list and fail to keep that in synch with the iteration variable file_name, even though you could just use that one! To show how this can be done better.
-
def rename_by_ext(to_ext, path):
if to_ext[0] != '.':
to_ext = '.'+to_ext
print "Renaming files in", path
for file_name in os.listdir(path):
root, ext = os.path.splitext(file_name)
print "Renaming", file_name, "to", root+ext
os.rename(os.path.join(path, file_name), os.path.join(path, root+to_ext))
rename_by_ext('.rar', '...')
os.rename really doesn't like variables. Use shutil. Example taken from How to copy and move files with Shutil.
import shutil
import os
source = os.listdir("/tmp/")
destination = "/tmp/newfolder/"
for files in source:
if files.endswith(".txt"):
shutil.move(files,destination)
In your case:
import shutil
shutil.move(file_list[entry_pos], new_file_name)
You also want to double backslashes to escape them in Python strings, so instead of
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
you want
file_list = os.listdir('C:\\Users\\B\\Desktop\\sil\\sil2')
Or use forward slashes - Python magically treats them as path separators on Windows.
You must use the full path for the rename.
import os
def dTask():
#Get a file name list
dir = 'C:\Users\B\Desktop\sil\sil2'
file_list = os.listdir(dir)
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename( os.path.join(dir, file_list[entry_pos]), os.path.join(dir,new_file_name))
++entry_pos
If you aren't in the directory C:\Users\B\Desktop\sil\sil2, then Python certainly won't be able to find those files.
import os
def extChange(path,newExt,oldExt=""):
if path.endswith != "\\" and path.endswith != "/":
myPath = path + "\\"
directory = os.listdir(myPath)
for i in directory:
x = myPath + i[:-4] + "." + newExt
y = myPath + i
if oldExt == "":
os.rename(y,x)
else:
if i[-4:] == "." + oldExt:
os.rename(y,x)
now call it:
extChange("C:/testfolder/","txt","lua") #this will change all .txt files in C:/testfolder to .lua files
extChange("C:/testfolder/","txt") #leaving the last parameter out will change all files in C:/testfolder to .txt

Categories

Resources