How to extract only mp3 files from a ZIP archive - python

I have this code:
from zipfile import ZipFile
import os
import glob
inp = raw_input("Specify a ZIP archive to extract:")
with ZipFile(inp) as zf:
zf.extractall()
It works fine because it extracts all the files but how do I extract all the .mp3 files in the archive that the user specifies.

To extract just the MP3 files from a ZIP archive, you could do the following:
from zipfile import ZipFile
import os
zip_file = r"c:\folder\myzip.zip"
target_folder = r"C:\Users\Fred\Desktop"
with ZipFile(zip_file, 'r') as my_zip:
mp3_files = [name for name in my_zip.namelist() if os.path.splitext(name)[1].lower() == '.mp3']
my_zip.extractall(target_folder, mp3_files)
The list of files inside the ZIP file can be obtained using the namelist function. With this you can filter just those files ending with an mp3 extension. The extractall function lets you pass a list of all of the files you want to extract (it defaults to all files).

You could get a list of the names of the members in the list, and only extract those ending with the suffix .mp3.

Related

How to read json files in subfolders?

I have a file path like this '/mnt/extract'. Now inside this extract folder, I have below 3 more subfolders -
subfolder1
subfolder2
subfolder3 (it has one .json file inside it)
The json in subfolder3 looks like this -
{
"x": "/mnt/extract/p",
"y": "/mnt/extract/r",
}
I want to extract the above json file from subfolder3 and concatenate the value - /mnt/extract/p for the key 'x' with one more string 'data' so that the final path will become '/mnt/extract/p/data' where I want to finally export some data. I tried the below approach but it's not working.
import os
for root, dirs, files in list(os.walk(path)):
for name in files:
print (os.path.join(root, name))
Using the in-built python Glob module, you can read files in folders and sub-folders.
Try this:
import glob
files = glob.glob('./mnt/extract/**/*.json', recursive=True)
The files list will contain paths to all json files in the extract directory.
Try this:
import glob
final_paths = []
extract_path= './mnt/extract'
files = glob.glob(extract_path+ '/**/*.json', recursive=True)
for file in files:
with open(file, 'r') as f:
json_file = json.load(f)
output_path = json_file['x']+'/'+'data'
final_paths.append(output_path)
The final_path variable will contain the output of all json files in the folder structure.
import glob
extract_path= '/mnt/extract'
files = glob.glob(extract_path+ '/**/*.json', recursive=True)
if len(files) != 0:
with open(files[0], 'r') as f:
dict = json.load(f)
final_output_path = dict['x']+'/'+'data'
In the above code, files object is returning a list containing JSON file as the only element. To make sure that we pass json object to the open method and not list, i took files[0] which will pick the json file element from list and then it was parsed easily.If anyone has some other suggestion to handle this list object which is retuning from glob function, feel free to answer as in how can we handle it in a more cleaner way.

How can I extract all .zip extension in a folder without retaining directory using python?

Here is my code I don't know how can I loop every .zip in a folder, please help me: I want all contents of 5 zip files to extracted in one folder, not including its directory name
import os
import shutil
import zipfile
my_dir = r"C:\\Users\\Guest\\Desktop\\OJT\\scanner\\samples_raw"
my_zip = r"C:\\Users\\Guest\\Desktop\\OJT\\samples\\001-100.zip"
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = file(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
repeated question, please refer below link.
How to extract zip file recursively in Pythonn
What you are looking for is glob. Which can be used like this:
#<snip>
import glob
#assuming all your zip files are in the directory below.
for my_zip in glob.glob(r"C:\\Users\\Guest\\Desktop\\OJT\\samples\\*.zip"):
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
#<snip> rest of your code here.

Zip single file

I am trying to zip a single file in python. For whatever reason, I'm having a hard time getting down the syntax. What I am trying to do is keep the original file and create a new zipped file of the original (like what a Mac or Windows would do if you archive a file).
Here is what I have so far:
import zipfile
myfilepath = '/tmp/%s' % self.file_name
myzippath = myfilepath.replace('.xml', '.zip')
zipfile.ZipFile(myzippath, 'w').write(open(myfilepath).read()) # does not zip the file properly
The correct way to zip file is:
zipfile.ZipFile('hello.zip', mode='w').write("hello.csv")
# assume your xxx.py under the same dir with hello.csv
The python official doc says:
ZipFile.write(filename, arcname=None, compress_type=None)
Write the file named filename to the archive, giving it the archive name arcname
You pass open(filename).read() into write(). open(filename).read() is a single string that contains the whole content of file filename, it would throw FileNotFoundError because it is trying to find a file named with the string content.
If the file to be zipped (filename) is in a different directory called pathname, you should use the arcname parameter. Otherwise, it will recreate the full folder hierarchy to the file folder.
from zipfile import ZipFile
import os
with ZipFile(zip_file, 'w') as zipf:
zipf.write(os.path.join(pathname,filename), arcname=filename)
Try calling zipfile.close() afterwards?
from zipfile import ZipFile
zipf = ZipFile("main.zip","w", zipfile.ZIP_DEFLATED)
zipf.write("main.json")
zipf.close()
Since you also want to specify the directory try using os.chdir:
#!/usr/bin/python
from zipfile import ZipFile
import os
os.chdir('/path/of/target/and/destination')
ZipFile('archive.zip', 'w').write('original_file.txt')
Python zipfile : Work with Zip archives
Python Miscellaneous operating system interfaces

Extracting the extracted with python

I have a zip file containing thousands of mixed .xml and .csv files. I used the following to extract the zip file:
import zipfile
zip = zipfile.ZipFile(r'c:\my.zip')
zip.extractall(r'c:\output')
Now I need to extract the thousands of individual zip files contained in the 'c:\output' folder. I am planning on concatenating just the .csv files into one file. Thank you for the help!
Try this code :
import zipfile , os
zip = zipfile.ZipFile(r'c:/my.zip')
zip.extractall(r'c:/output')
filelist = []
for name in zip.namelist():
filelist.append(name)
zip.close()
for i in filelist:
newzip = zipfile.ZipFile(r'c:/output/'+str(i))
for file in newzip.namelist():
if '.csv' in file :
newzip.extract(file,r'c:/output/')
newzip.close()
os.remove(r'c:/output/'+str(i))

How to unzip specific folder from a .zip with Python

I am looking to unzip a particular folder from a .zip in Python:
e.g. archive.zip contains the folders foo and bar, I want to unzip foo to a specific location, retaining it's folder structure.
Check zipfile module.
For your case:
import zipfile
archive = zipfile.ZipFile('archive.zip')
for file in archive.namelist():
if file.startswith('foo/'):
archive.extract(file, 'destination_path')
You should close your zips....
import zipfile
archive = zipfile.ZipFile('archive.zip')
for file in archive.namelist():
if file.startswith('foo/'):
archive.extract(file, 'destination_path')
archive.close()
Or just use a safer method. With will close your zip.
import zipfile
with zipfile.ZipFile('archive.zip') as archive:
for file in archive.namelist():
if file.startswith('foo/'):
archive.extract(file, 'destination_path')
I like to reduce the list of names first so that the for loop doesn't parse through all the files in the zip archive:
import zipfile
archive = zipfile.ZipFile('archive.zip')
names_foo = [i for i in archive.namelist() if i.startswith('foo') ]
for file in names_foo:
archive.extract(file)
using zipfile library is very very slow.
this is better way:
os.system('unzip -P your-password path/to/file.zip')

Categories

Resources