I use below code to moving files to their specific folders but at the end I don't know how i can zip those folders.
Note: i want use shutil module to zip the file.
import shutil
import os
source="/tmp/"
destination1="/tmp/music/"
destination2="/tmp/picture/"
destination3="/tmp/video/"
if not os.path.exists(destination1):
os.makedirs(destination1)
if not os.path.exists(destination2):
os.makedirs(destination2)
if not os.path.exists(destination3):
os.makedirs(destination3)
for f in os.listdir(source):
if f.endswith(".MP3") or f.endswith(".wma") or f.endswith(".WMA") or f.endswith(".mp3") :
shutil.move(source + f,destination1)
if f.endswith(".png") or f.endswith(".PNG") or f.endswith(".jpg") or f.endswith(".JPG") or f.endswith(".GIF") or f.endswith(".gif"):
shutil.move(source + f,destination2)
if f.endswith(".MP4") or f.endswith(".mp4") or f.endswith(".WMV") or f.endswith(".FLV") or f.endswith(".flv") or f.endswith(".wmv"):
shutil.move(source + f,destination3)
#now zipping:
shutil.make_archive("archive",'zip',"/tmp/","music"+"video"+"picture")
"music"+"video"+"picture"
gives you
'musicvideopicture'
the simplest way will be make dir /tmp/archive/ and there music, video, pictures,
and then
shutil.make_archive("archive",'zip',"/tmp/archive")
Edit:
consider using gztar :)
Edit2:
import shutil
import os
source = "/tmp/"
dest_base = "/tmp/archive/"
destination1 = dest_base + "music/"
destination2 = dest_base + "picture/"
destination3 = dest_base + "video/"
audio_ext = ('mp3', 'wma')
pictu_ext = ('png', 'jpg', 'gif')
video_ext = ('mp4', 'wmv', 'flv', 'avi')
if not os.path.exists(destination1):
os.makedirs(destination1)
if not os.path.exists(destination2):
os.makedirs(destination2)
if not os.path.exists(destination3):
os.makedirs(destination3)
for f in os.listdir(source):
ext = f.split('.')[-1].lower()
if ext in audio_ext:
shutil.move(source + f, destination1)
elif ext in pictu_ext:
shutil.move(source + f, destination2)
elif ext in video_ext:
shutil.move(source + f, destination3)
#now zipping:
shutil.make_archive("archive", 'gztar', "/tmp/archive")
Related
import os
import shutil
src_folder = r"C:\new1\\"
dst_folder = r"C:\new2\\"
file_name = 'testword.docx'
if os.path.exists(dst_folder + file_name):
data = os.path.splitext(file_name)
only_name = data[0]
extension = data[1]
new_base = only_name + 'Renamed' + extension
new_name = os.path.join(dst_folder, new_base)
shutil.move(src_folder + file_name, new_name)
else:
shutil.move(src_folder + file_name, dst_folder + file_name)
I was trying to write a code to move a file from one folder to another and rename it. The file is moving to another folder, But I can't rename it. I am doing this using python(spyder). Can anyone help me with this.
I have a couple slides, each slide corresponds to a person. I need to name each file (.pptx) after the individual name it references. A lot of the examples I see on mass renaming have the renaming become sequential like:
file1
file2
file3
I need:
bob.pptx
sue.pptx
jack.pptx
I was able to change names using os found on this site https://www.marsja.se/rename-files-in-python-a-guide-with-examples-using-os-rename/:
import os, fnmatch
file_path = 'C:\\Users\\Documents\\Files_To_Rename\\Many_Files\\'
files_to_rename = fnmatch.filter(os.listdir(file_path), '*.pptx')
print(files_to_rename)
new_name = 'Datafile'
for i, file_name in enumerate(files_to_rename):
new_file_name = new_name + str(i) + '.pptx'
os.rename(file_path + file_name,
file_path + new_file_name)
But again, this just names it:
Datafile1
Datafile2
etc
my example
import os from pathlib
import Path
files = os.listdir("c:\\tmp\\")
for key in range(0, len(files)):
print (files[key])
os.rename("c:\\tmp\\" + files[key], "c:\\tmp\\" + files[key].replace("-",""))
Path("c:\\tmp\\" + files[key] + '.ok').touch() # if u need add some extension
Here's how I ran your code (avoiding file paths I don't have!), getting it to print output not just rename
import os, fnmatch
file_path = '.\\'
files_to_rename = fnmatch.filter(os.listdir(file_path), '*.pptx')
print(files_to_rename)
new_name = 'Datafile'
for i, file_name in enumerate(files_to_rename):
new_file_name = new_name + str(i) + '.pptx'
print (file_path + new_file_name)
os.rename(file_path + file_name,
file_path + new_file_name)
This gave me
.\Datafile0.pptx
.\Datafile1.pptx
...
and did give me the correct sequence of pptx files in that folder.
So I suspect the problem is that you are getting the file names you want, but you can't see them in Windows. Solution: show file types in Windows. Here's one of many available links as to how: https://www.thewindowsclub.com/show-file-extensions-in-windows
Thank you everyone for your suggestions, I think I found it with a friend's help:
import os, fnmatch
import pandas as pd
file_path = 'C:\\Users\\Documents\\FolderwithFiles\\'
files_to_rename = fnmatch.filter(os.listdir(file_path), '*.pptx') #looks for any .ppt in path, can make any ext
df = pd.read_excel('Names.xlsx') #make a list of names in an xl, this won't read the header, header should be Names, then list your names)
for i, file_name in zip(df['Names'], files_to_rename): #zip instead of a nest for loop
new_file_name = i + '.pptx'
os.rename(file_path + file_name, file_path + new_file_name)
print(new_file_name)
I have tons of Word and Excel files. I want to convert many Word files in folders by sub folders to PDF, and I try following code.
This code is not active (I mean there aren't Word convert to PDF) although no error.
What could be the problem? Is there another solution?
This is my code:
import os
from win32com import client
path = 'D:\programing\test'
word_file_names = []
word = client.DispatchEx("Word.Application")
for dirpath, dirnames, filenames in os.walk(path):
print (dirpath)
for f in filenames:
if f.lower().endswith(".docx") and re.search('Addendum', f):
new_name = f.replace(".docx", r".pdf")
in_file = word_file_names.append(dirpath + "\\" + f)
new_file = word_file_names.append(dirpath + "\\" + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
if f.lower().endswith(".doc") and re.search('Addendum', f):
new_name = f.replace(".doc", r".pdf")
in_file = word_file_names.append(dirpath + "\\" + f)
new_file = word_file_names.append(dirpath + "\\" + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
word.Quit()
This is way easier:
from docx2pdf import convert
convert(word_path, pdf_path)
You can use comtypes,
from comtypes.client import CreateObject
import os
folder = "folder path"
wdToPDF = CreateObject("Word.Application")
wdFormatPDF = 17
files = os.listdir(folder)
word_files = [f for f in files if f.endswith((".doc", ".docx"))]
for word_file in word_files:
word_path = os.path.join(folder, word_file)
pdf_path = word_path
if pdf_path[-3:] != 'pdf':
pdf_path = pdf_path + ".pdf"
if os.path.exists(pdf_path):
os.remove(pdf_path)
pdfCreate = wdToPDF.Documents.Open(word_path)
pdfCreate.SaveAs(pdf_path, wdFormatPDF)
i solved this problem and fixed the code has following
import os
import win32com.client
import re
path = (r'D:\programing\test')
word_file_names = []
word = win32com.client.Dispatch('Word.Application')
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
if f.lower().endswith(".docx") :
new_name = f.replace(".docx", ".pdf")
in_file =(dirpath + '/'+ f)
new_file =(dirpath + '/' + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
if f.lower().endswith(".doc"):
new_name = f.replace(".doc", ".pdf")
in_file =(dirpath +'/' + f)
new_file =(dirpath +'/' + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
word.Quit()
I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()
use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]
You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.
Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)
I have the same problem as here but now I'm trying to do the same with python because it's more suited to the task.
I've started with this:
import os
import shutil
import random
import glob
root_dir = '/home/leonardo/Desktop/python_script/rfe'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 200
folders_root_dir = os.listdir(root_dir)
print folders_root_dir
count = len(folders_root_dir)
print count
for i in xrange(count):
folder_inside = root_dir + '/' + folders_root_dir[i]
print folder_inside
number_files_folder_inside = len(os.listdir(folder_inside))
print number_files_folder_inside
if number_files_folder_inside > ref:
ref_copy = round(0.2*number_files_folder_inside)
print ref_copy
# here I have to copy 20% of the files in this folder to the output folder
else:
# here I have to copy all files from the folder to the output_dir
I tried to use os.walk() but I'm new to python and selecting files while the function is working proved to be really tough.
You'll need to import these:
import os
import shutil
import random
You can get all the files in a directory like this:
files = [file for file in os.listdir(dir) if os.path.isfile(os.path.join(dir, file))]
Then use a conditional:
if len(files) < 200:
for file in files:
shutil.copyfile(os.path.join(dir, file), dst)
else:
# Amount of random files you'd like to select
random_amount = 1000
for x in xrange(random_amount):
if len(files) == 0:
break
else:
file = random.choice(files)
shutil.copyfile(os.path.join(dir, file), outputdir)
A more compact solution (also noticing that copyfile does not really do the job properly unless one specifies the target file name as well):
import os
import shutil
import random
def get_file_list(input_dir):
return [file for file in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, file))]
def get_random_files(file_list, N):
return random.sample(file_list, N)
def copy_files(random_files, input_dir, output_dir):
for file in random_files:
shutil.copy(os.path.join(input_dir, file), output_dir)
def main(input_dir, output_dir, N):
file_list = get_file_list(input_dir)
random_files = get_random_files(file_list, N)
copy_files(random_files, input_dir, output_dir)
import os
import shutil
import random
root_dir = '/home/leonardo/Desktop/python_script/qar'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 1
for root, dirs, files in os.walk(root_dir):
number_of_files = len(os.listdir(root))
if number_of_files > ref:
ref_copy = int(round(0.2 * number_of_files))
for i in xrange(ref_copy):
chosen_one = random.choice(os.listdir(root))
file_in_track = root
file_to_copy = file_in_track + '/' + chosen_one
if os.path.isfile(file_to_copy) == True:
shutil.copy(file_to_copy,output_dir)
print file_to_copy
else:
for i in xrange(len(files)):
track_list = root
file_in_track = files[i]
file_to_copy = track_list + '/' + file_in_track
if os.path.isfile(file_to_copy) == True:
shutil.copy(file_to_copy,output_dir)
print file_to_copy
print 'Finished !'
The final code has this face
thank you guys for the help !
cheers !
I want this for splitting my dataset to train,test and validation.
here is my code :
import os
import shutil
import random
import numpy as np
dir = r'E:\down\imgs'
train_dir = r'E:/train_test_split/train'
test_dir = r'E:/train_test_split/test'
valid_dir = r'E:/train_test_split/validation'
files = [file for file in os.listdir(dir) if os.path.isfile(os.path.join(dir, file))]
train_count = np.round(50/100*len(files))
test_count = np.round(30/100*len(files))
valid_count = np.round(20/100*len(files))
rndnums = list(random.sample(range(0, len(files)), len(files)))
print("len(files)",len(files))
# print("all",len(files))
# print("train",np.round(train*len(files)))
# print("test",np.round(test*len(files)))
# print("valid",np.round(valid*len(files)))
#
# print("sum",np.round(train*len(files)) + np.round(test*len(files)) + np.round(valid*len(files)))
# Amount of random files you'd like to select
##train_files
print(rndnums)
train_file_index = rndnums[0:int(train_count)+1]
train_file_name = [files[i] for i in train_file_index]
test_file_index = rndnums[int(train_count)+1:int(train_count + test_count)+1]
test_file_name = [files[i] for i in test_file_index]
valid_file_index = rndnums[int(train_count + test_count)+1:]
valid_file_name = [files[i] for i in valid_file_index]
for x in train_file_name:
file = x
shutil.copyfile(os.path.join(dir, file), os.path.join(train_dir, file))
##test_files
for y in test_file_name:
file = y
shutil.copyfile(os.path.join(dir, file), os.path.join(test_dir, file))
##valid_files
for z in valid_file_name:
file = z
shutil.copyfile(os.path.join(dir, file), os.path.join(valid_dir, file))
maybe something like (untested)
import os
THRESHOLD = 200
root_dir = "\home..."
output_dir = "\home....."
for top, dirs, nondirs in os.walk(root_dir):
for name in nondirs[:THRESHOLD]:
path = os.path.join(top, name)
destination = os.path.join(output_dir, name)
os.rename(path, destination)
import random
import shutil
import os
rootdir = '/home/leonardo/Desktop/python_script/qar'
outdir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 200
dirsAndFiles = {} # here we store a structure {folder: [file1, file2], folder2: [file2, file4] }
dirs = [x[0] for x in os.walk(rootdir)] # here we store all sub-dirs
for dir in dirs:
dirsAndFiles[dir] = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
for (dir, files) in dirsAndFiles.iteritems():
if len(files) > ref:
for i in xrange(int(0.2*len(files))): # copy 20% of files
fe = random.choice(files)
files.remove(fe)
shutil.copy(os.path.join(dir, fe), outdir)
else: # copy all files
for file in files:
shutil.copy(os.path.join(dir, file), outdir)