Compress all files in a folder with python? - python

this code takes a bunch of files in a folder (based on the file name), zips them into bz2 and adds them into a tar file. Is there a way I can modify this to only compress the files into bz2 (or gzip)? I do not want to have to deal with having them packaged into a tar. I just want to go through each file in a directory and compress it.
import os
from glob import glob
import tarfile
os.chdir(r'C:\Documents\FTP\\')
compression = "w:bz2"
extension = '.tar.bz2'
filename = 'survey_'
filetype = 'survey_report_*.csv'
tarname = saveloc+filename+extension
files = glob(filetype)
tar = tarfile.open(tarname, compression)
for file in files:
if file not in tarname:
print('Packaging file:', file)
tar.add(file)
tar.close()
EDIT:
This code seems to work for some files, but for other ones it makes them 1kb and when I open it there are just some random characters. Any suggestions?
import bz2
import os
location = r'C:\Users\Documents\FTP\\'
os.chdir(location)
filelist = os.listdir(location)
for file in filelist:
data = open(file).read()
try:
output = bz2.BZ2File(file + '.bz2', 'wb')
output.write(data)
finally:
output.close()

Related

How to extract a mult-part zip file in python?

Suposse that I have some files that I downloaded from a server and they are zipped with 7zip in multiple parts, the format is something like this myfile.zip.001, myfile.zip.002, ..., myfile.zip.00n. Basically, I need to extract the content of it in the same folder where they are stored.
I tried using zipfile, patoolib and pyunpack without success, here is what I've done:
file_path = r"C:\Users\user\Documents\myfile.zip.001" #I also tested with only .zip
extract_path = r"C:\Users\user\Documents\"
#"
import zipfile
with zipfile.ZipFile(file_path, "r") as zip_ref:
zip_ref.extractall(extract_path) # myfile.zip.001 file isn't zip file.
from pyunpack import Archive
Archive(file_path).extractall(extract_path) # File is not a zip file
import patoolib
patoolib.extract_archive(file_path, outdir=extract_path) # unknown archive format for file `myfile.zip.001'
Another way (that works, but it's very ugly) is this one:
import os
import subprocess
path_7zip = r"C:\Program Files (x86)\7-Zip\7z.exe"
cmd = [path_7zip, 'x', 'myfile.zip.001']
sp = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
But this makes the user install 7zip in his computer, which isn't a good approach of what I'm looking for.
So, the question is: there is at least a way to extract/unzip multi-parts files with the format x.zip.001 in python?
You seem to be on the right track with zipfile, but you most likely have to concatenate the zip file before using extractall.
import os
zip_prefix = "myfile.zip."
# N number of parts
import glob
parts = glob.glob(zip_prefix + '*')
n = len(parts)
# Concatenate
with open("myfile.zip", "wb") as outfile:
for i in range(1, n+1):
filename = zip_prefix + str(i).zfill(3)
with open(filename, "rb") as infile:
outfile.write(infile.read())
# Extract
import zipfile
with zipfile.ZipFile(file_path, "r") as zip_ref:
zip_ref.extractall(extract_path)

Getting this error: RuntimeError: Proxy error(FileNotFoundException): Could not find file 'C:\Users\user\stuff\tests\pythonlearn.pdf'

import os
import aspose.words as aw
rootdir = 'C:/Users/user/stuff/tests'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
a = os.path.join(subdir, file)
doc = aw.Document(a)
doc.save("utput.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
This is my program.
I am trying to run python through a folder containing pdf files, and decrypt them one by one by converting it to word, and then to pdf. What am I doing wrong?
Don't use os.walk. use os.listdir(rootdir) instead. Please note that the saved file and the used file have the same name.
Example:
import os
import aspose.words as aw
root = "C:/Users/user/stuff/tests"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
doc = aw.Document(item)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT]
above code cant find other folders so i decide to use glob to find all folders
Here:
import os
import aspose.words as aw
import glob
# Set base directory
os.chdir(os.path.join("C:/Users/user/stuff/tests"))
# Geting all pdf files in list
pdf_files = glob.glob("*.pdf")
for files in pdf_files:
doc = aw.Document(files)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT-2]
First take all .pdf files in one list :
pdf_files = glob.glob("*.pdf")
other_pdf_files = glob.glob('*/*.pdf')
all_pdf_files=(*pdf_files,*other_pdf_files)
Secondly, you need to use PyPDF2 to get rid of password.
Get unencrypted pdfs by sending all pdf files into decrypt_pdf (don't forget to specify the password). For example: (More detail here and here)
from PyPDF2 import PdfFileReader, PdfFileWriter
def decrypt_pdf(input_path, output_path, password):
with open(input_path, 'rb') as input_file, \
open(output_path, 'wb') as output_file:
reader = PdfFileReader(input_file)
reader.decrypt(password)
writer = PdfFileWriter()
for i in range(reader.getNumPages()):
writer.addPage(reader.getPage(i))
writer.write(output_file)
You can run other parts in the same way.
for files in all_pdf_files:
doc = aw.Document(files)
...

Uncompress batch of "tar.gz" files in a folder

There is a folder which contain several "tar.gz" files. I want to uncompress these files. I write python code below. But it reports error: [Errno 2] No such file or directory: 'DT_20180322.tar.gz'. I wonder why I have this error and how to correct it? Thanks!
import tarfile
files = [f for f in os.listdir('TE-xy/')]
for fname in files:
if fname.endswith("tar.gz"):
tar = tarfile.open(fname, "r:gz")
tar.extractall()
tar.close()
The problem is you are just using the filename.
You need to include the directory name as well.
import os
import tarfile
files = [f for f in os.listdir('TE-xy/')]
for fname in files:
if fname.endswith("tar.gz"):
fpath = os.path.join('TE-xy', fname)
tar = tarfile.open(fpath, "r:gz")
tar.extractall()
tar.close()

Walk directories and remove file extensions

I'm trying to remove all the outlook .ost and .nst files from the user's folder on a network PC, as well as I'm trying to get it to write what files were removed into a CSV file.
I'm able to get it to find all the files in the directory and write it to a CSV file but when I try to remove the files with os.remove it doesn't seem to run, I hashed it out for the time being.
I added in the try and except, to skip the files that are in use.
import os
import sys
sys.stdout = open("output_file.csv", "w")
try:
for rootDir, subdir, files in os.walk("//network_pc_name/c$/Users"):
for filenames in files:
if filenames.endswith((".nst",".ost")):
foundfiles = os.path.join(rootDir, filenames)
#os.remove(os.path.join(rootDir, filenames))
print(foundfiles)
except:
pass
sys.stdout.close()
I made some change to the script as suggested and it appears to run alot quicker, however, I can't seem to figure out how to ignore files which are in use.
I switched the files extensions to .xlsx and .txt files to simulate the .xlsx file being open receiving the permissions error and to see if the script would continue to run and remove the .txt file.
I got the following error:
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: '//DESKTOP-HRLS19N/c$/globtest\Book1.xlsx
import glob
import os
files = [i for i in glob.glob("//DESKTOP-HRLS19N/c$/globtest/**", recursive = True) if i.endswith((".xlsx",".txt"))]
[os.remove(f) for f in files]
with open("output_file.csv", "w") as f:
f.writelines("\n".join(files))
In my experience glob is much easier:
print([i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))])
Assuming that prints out the files you're expecting:
files = [i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))]
removed_files = []
for file in files:
try:
size = os.path.getsize(file)
os.remove(file)
removed_files.append(file + " Bytes: " + size)
except Exception as e:
print("Could not remove file: " + file)
with open("output_file.csv", "w") as f:
f.writelines("\n".join(removed_files))

How to Decompress all .Z files with one command

I'm using Debian. I have lot of files with ".z" format. I can only decompress all files with this command one by one:
python z_unpack.py "example.z" "example"
This command is running with this script
I want to decompress all files in different directory with one command is it possible?
.Z files sample image in directory;
EXAMPLE/example1/example2.z
EXAMPLE/example1/example3.z
EXAMPLE/example2/example4.z
EXAMPLE/example3/example5.z
NOTE: This files are not GZIP they are not decompressed with gzip commands
Can someone help?
You can use glob :
import glob, os
def z_unpack_dir(dir):
os.chdir(dir)
for file in glob.glob("*.z"):
file_name = os.path.basename(file) # file_name = "example.z"
dest_name = file_name[:-2] # dest_name = "example"
z_unpack(file_name, dest_name)
or os.listdir:
import os
def z_unpack_dir(dir):
for file_name in os.listdir(dir):
if file_name.endswith(".z"):
dest_name = file_name[:-2] # dest_name = "example"
z_unpack(file_name, dest_name)

Categories

Resources