Python error extracting some zip files

Python error extracting some zip files - python

i wrote a small app that downloads a zip file (with a different extension) when provided by a link and extracts the file to a renamed folder.
For some reason its working for some of my zip files, but not for all of them.
I get a :
Traceback (most recent call last):
File "download_unzip.py", line 48, in <module>
main()
File "download_unzip.py", line 42, in main
shutil.move(unzip_file(temp_kmz),'temp_extracted/')
File "download_unzip.py", line 26, in unzip_file
fd = open(name, 'w')
IOError: [Errno 2] No such file or directory: 'models/model.dae'
My code is :
import sys , urllib , zipfile , os.path , argparse , shutil
parser = argparse.ArgumentParser(description="Download and Unzip")
parser.add_argument('url', help='The action to take (e.g. install, remove, etc.)')
args = parser.parse_args()
print args.url
url = args.url
temp_kmz="temp_kmz"
def unzip_file(path):
zfile = zipfile.ZipFile(path)
extracted_filename = zfile.infolist()[0].filename[:-1]
for name in zfile.namelist():
(dirname, filename) = os.path.split(name)
#print "Decompressing " + filename + " on " + dirname
if filename == '':
# directory
if not os.path.exists(dirname):
os.mkdir(dirname)
else:
# file
fd = open(name, 'w')
fd.write(zfile.read(name))
fd.close()
zfile.close()
return extracted_filename
def download_file():
urllib.urlretrieve (url, temp_kmz)
return True
def main():
if (download_file()):
print "Now deleting temp..."
shutil.rmtree('temp_extracted/')
print "unzipping.. and renaming folder"
shutil.move(unzip_file(temp_kmz),'temp_extracted/')
print "Finished!!"
else:
print "Error downloading file"
main()
my working downloaded file:
python download_unzip.py "http://dl.dropbox.com/u/2971439/dae.kmz"
The one that is not working:
python download_unzip.py
"http://dl.dropbox.com/u/2971439/rally_car_youbeq.kmz"
Please note that both files extract properly with my OS (Ubuntu)

fixed my problem with some heavy code changes:
import urllib2 ,argparse, shutil, urlparse , os , zipfile, os.path
from zipfile import ZipFile as zip
parser = argparse.ArgumentParser(description="Download and Unzip")
parser.add_argument('url', help='The action to take (e.g. install, remove, etc.)')
args = parser.parse_args()
print args.url
url = args.url
temp_kmz="temp_kmz"
def extractAll(zipName):
z = zip(zipName)
for f in z.namelist():
if f.endswith('/'):
os.makedirs(f)
else:
z.extract(f)
def download(url, fileName=None):
def getFileName(url,openUrl):
if 'Content-Disposition' in openUrl.info():
# If the response has Content-Disposition, try to get filename from it
cd = dict(map(
lambda x: x.strip().split('=') if '=' in x else (x.strip(),''),
openUrl.info()['Content-Disposition'].split(';')))
if 'filename' in cd:
filename = cd['filename'].strip("\"'")
if filename: return filename
# if no filename was found above, parse it out of the final URL.
return os.path.basename(urlparse.urlsplit(openUrl.url)[2])
r = urllib2.urlopen(urllib2.Request(url))
try:
fileName = fileName or getFileName(url,r)
with open(fileName, 'wb') as f:
shutil.copyfileobj(r,f)
finally:
r.close()
def main():
download(url,temp_kmz)
extractAll(temp_kmz)
main()

Related

How do i upload a folder containing metadata to pinata using a script in python-brownie?

I've been trying for the past 24 hours but can't find a solution.
This is the code:
import os
from pathlib import Path
import requests
PINATA_BASE_URL = "https://api.pinata.cloud/"
endpoint = "pinning/pinFileToIPFS"
# Change this filepath
filepath = "C:/Users/acer/Desktop/Ciao"
filename = os.listdir(filepath)
print(filename)
headers = {
"pinata_api_key": os.getenv("PINATA_API_KEY"),
"pinata_secret_api_key": os.getenv("PINATA_API_SECRET"),
}
def main():
with Path(filepath).open("rb") as fp:
image_binary = filepath.read()
print(image_binary)
response = requests.post(
PINATA_BASE_URL + endpoint,
files={"file": (filename, image_binary)},
headers=headers,
)
print(response.json())
if __name__ == "__main__":
main()
I tried to open the folder where the metadata was stored and than i sent the request with the list of files in the folder.
This is the error:
['no.txt', 'yeah.txt']
Traceback (most recent call last):
File "C:\Users\acer\Desktop\SOLIDITY_PYTHON\nft-bored-ape\scripts\upload_to_pinata.py", line 30, in <module>
main()
File "C:\Users\acer\Desktop\SOLIDITY_PYTHON\nft-bored-ape\scripts\upload_to_pinata.py", line 18, in main
with Path(filepath).open("rb") as fp:
File "C:\Users\acer\AppData\Local\Programs\Python\Python310\lib\pathlib.py", line 1119, in open
return self._accessor.open(self, mode, buffering, encoding, errors,
PermissionError: [Errno 13] Permission denied: 'C:\\Users\\acer\\Desktop\\Ciao'

Nevermind...
After some research i found the answer to my own question.
Here is the code:
# Tulli's script :-)
from brownie import config
import requests, os, typing as tp
PINATA_BASE_URL = "https://api.pinata.cloud/"
endpoint = "pinning/pinFileToIPFS"
# Here you could use os.getenv("VARIABLE_NAME"),
# i used config from my .yaml file. Your choice!
headers = {
"pinata_api_key": config["pinata"]["api-keys"],
"pinata_secret_api_key": config["pinata"]["api-private"],
}
def get_all_files(directory: str) -> tp.List[str]:
"""get a list of absolute paths to every file located in the directory"""
paths: tp.List[str] = []
for root, dirs, files_ in os.walk(os.path.abspath(directory)):
for file in files_:
paths.append(os.path.join(root, file))
return paths
def upload_folder_to_pinata(filepath):
all_files: tp.List[str] = get_all_files(filepath)
# The replace function is a must,
# pinata servers doesn't recognize the backslash.
# Your filepath is probably different than mine,
# so in the split function put your "penultimate_file/".
# Strip the square brackets and the apostrophe,
# because we don't want it as part of the metadata ipfs name
files = [
(
"file",
(
str(file.replace("\\", "/").split("Desktop/")[-1:])
.strip("[]")
.strip("'"),
open(file, "rb"),
),
)
for file in all_files
]
response: requests.Response = requests.post(
PINATA_BASE_URL + endpoint,
files=files,
headers=headers,
)
# If you want to see all the stats then do this:
# return/print/do both separately response.json()
return "ipfs.io/ipfs/" + response.json()["IpfsHash"]
def main():
upload_folder_to_pinata("Put your full filepath here")
if __name__ == "__main__":
main()

python script searching for a string in files in directory and subdirectories

I have a python script that searches for a string in files in a directory and its subdirectories.
import os
from sys import argv
print(argv)
searchStr = argv[1]
def searchDir(dirCurrent):
try:
main_directory = os.listdir(dirCurrent)
for item in main_directory:
item_path = os.path.join(dirCurrent, item)
if os.path.isdir(item_path) == True:
searchDir(item_path)
else:
f = open(item_path, 'r')
file_contents = f.read()
if searchStr in file_contents:
print("found in file " + item_path)
except:
print("Unable to access the directory " + dirCurrent)
searchDir("C:\\Users\\myname-adm\\Documents")
It runs, but when it encounters folders without read permissions, the script stops. How can I modify it so it can keep on searching while skipping the folders without read access?
Thank you for your help.

This should do the trick:
import os from sys import argv
print(argv)
searchStr = argv[1]
def searchDir(dirCurrent):
main_directory = os.listdir(dirCurrent)
for item in main_directory:
try:
item_path = os.path.join(dirCurrent, item)
if os.path.isdir(item_path) == True:
searchDir(item_path)
else:
f = open(item_path, 'r')
file_contents = f.read()
if searchStr in file_contents:
print("found in file " + item_path)
except:
print("Unable to access the directory " + dirCurrent)
searchDir("C:\\Users\\myname-adm\\Documents")

How to save data from python into a csv file

I've got a program that on the end prints a "match" I wanted to save the data in this "match" to a csv file, how can I do that? I've wrote some code, to save this variable, but it doesn't write anything
Here's my code:
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
file_list = os.listdir(r"C:\\PROJECT\\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\\PROJECT\\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows('%s' %match)
The part where I tried to save it into a csv file is the last 3 lines of code.
Here's a print of the "match" format: https://gyazo.com/930f9dad12109bc50825c91b51fb31f3

the way your code is structured, you iterate over the matches in your for loop, then, when the loop is finished, you save the last match in your CSV. You probably want to write each match in your CSV instead, inside the for loop.
try to replace the last lines of your code (starting at the last for loop) by:
with open('score.csv', 'wt') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
writer.writerow(match)

Assuming you already have your "match", you can use the CSV module in Python. The writer should get your job done.
It would be more helpful if you could elaborate on the format of your data.

Run file inside a zipfile?

Is it possible to run a .html or .exe for example, that is inside a zipfile? I'm using the Zipfile module.
Here's my sample code:
import zipfile
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
for filename in z.namelist():
#print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
g = filename
f = z.open(g)
After f = z.open(g), I don't know what to do next. I tried using the .read() but it only reads whats inside of the html, what I need is for it to run or execute.
Or is there any othere similar ways to do this?

The best approach will be to extract the required file to the Windows temp directory and execute it. I have modified your original code to create a temp file and execute it:
import zipfile
import shutil
import os
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
basename = ""
for filename in z.namelist():
print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
basename = os.path.basename(filename) #get the file name and extension from the return path
g = filename
print basename
break #found what was needed, no need to run the loop again
f = z.open(g)
temp = os.path.join(os.environ['temp'], basename) #create temp file name
tempfile = open(temp, "wb")
shutil.copyfileobj(f, tempfile) #copy unzipped file to Windows 'temp' folder
tempfile.close()
f.close()
os.system(temp) #run the file

Run the first .html file in a zip archive specified at the command line:
#!/usr/bin/env python
import os
import shutil
import sys
import tempfile
import webbrowser
import zipfile
from subprocess import check_call
from threading import Timer
with zipfile.ZipFile(sys.argv[1], 'r') as z:
# find the first html file in the archive
member = next(m for m in z.infolist() if m.filename.endswith('.html'))
# create temporary directory to extract the file to
tmpdir = tempfile.mkdtemp()
# remove tmpdir in 5 minutes
t = Timer(300, shutil.rmtree, args=[tmpdir], kwargs=dict(ignore_errors=True))
t.start()
# extract the file
z.extract(member, path=tmpdir)
filename = os.path.join(tmpdir, member.filename)
# run the file
if filename.endswith('.exe'):
check_call([filename]) # run as a program; wait it to complete
else: # open document using default browser
webbrowser.open_new_tab(filename) #NOTE: returns immediately
Example
T:\> open-from-zip.py file.zip
As an alternative to webbrowser you could use os.startfile(os.path.normpath(filename)) on Windows.

How to unzip a file with Python 2.4?

I'm having a hard time figuring out how to unzip a zip file with 2.4. extract() is not included in 2.4. I'm restricted to using 2.4.4 on my server.
Can someone please provide a simple code example?

You have to use namelist() and extract(). Sample considering directories
import zipfile
import os.path
import os
zfile = zipfile.ZipFile("test.zip")
for name in zfile.namelist():
(dirname, filename) = os.path.split(name)
print "Decompressing " + filename + " on " + dirname
if not os.path.exists(dirname):
os.makedirs(dirname)
zfile.extract(name, dirname)

There's some problem with Vinko's answer (at least when I run it). I got:
IOError: [Errno 13] Permission denied: '01org-webapps-countingbeads-422c4e1/'
Here's how to solve it:
# unzip a file
def unzip(path):
zfile = zipfile.ZipFile(path)
for name in zfile.namelist():
(dirname, filename) = os.path.split(name)
if filename == '':
# directory
if not os.path.exists(dirname):
os.mkdir(dirname)
else:
# file
fd = open(name, 'w')
fd.write(zfile.read(name))
fd.close()
zfile.close()

Modifying Ovilia's answer so that you can specify the destination directory as well:
def unzip(zipFilePath, destDir):
zfile = zipfile.ZipFile(zipFilePath)
for name in zfile.namelist():
(dirName, fileName) = os.path.split(name)
if fileName == '':
# directory
newDir = destDir + '/' + dirName
if not os.path.exists(newDir):
os.mkdir(newDir)
else:
# file
fd = open(destDir + '/' + name, 'wb')
fd.write(zfile.read(name))
fd.close()
zfile.close()

Not fully tested, but it should be okay:
import os
from zipfile import ZipFile, ZipInfo
class ZipCompat(ZipFile):
def __init__(self, *args, **kwargs):
ZipFile.__init__(self, *args, **kwargs)
def extract(self, member, path=None, pwd=None):
if not isinstance(member, ZipInfo):
member = self.getinfo(member)
if path is None:
path = os.getcwd()
return self._extract_member(member, path)
def extractall(self, path=None, members=None, pwd=None):
if members is None:
members = self.namelist()
for zipinfo in members:
self.extract(zipinfo, path)
def _extract_member(self, member, targetpath):
if (targetpath[-1:] in (os.path.sep, os.path.altsep)
and len(os.path.splitdrive(targetpath)[1]) > 1):
targetpath = targetpath[:-1]
if member.filename[0] == '/':
targetpath = os.path.join(targetpath, member.filename[1:])
else:
targetpath = os.path.join(targetpath, member.filename)
targetpath = os.path.normpath(targetpath)
upperdirs = os.path.dirname(targetpath)
if upperdirs and not os.path.exists(upperdirs):
os.makedirs(upperdirs)
if member.filename[-1] == '/':
if not os.path.isdir(targetpath):
os.mkdir(targetpath)
return targetpath
target = file(targetpath, "wb")
try:
target.write(self.read(member.filename))
finally:
target.close()
return targetpath

I am testing in Python 2.7.3rc2 and the the ZipFile.namelist() is not returning an entry with just the sub directory name for creating a sub directory, but only a list of file names with sub directory, as follows:
['20130923104558/control.json', '20130923104558/test.csv']
Thus the check
if fileName == '':
does not evaluate to True at all.
So I modified the code to check if the dirName exists inside destDir and to create dirName if it does not exist. File is extracted only if fileName part is not empty. So this should take care of the condition where a directory name can appear in ZipFile.namelist()
def unzip(zipFilePath, destDir):
zfile = zipfile.ZipFile(zipFilePath)
for name in zfile.namelist():
(dirName, fileName) = os.path.split(name)
# Check if the directory exisits
newDir = destDir + '/' + dirName
if not os.path.exists(newDir):
os.mkdir(newDir)
if not fileName == '':
# file
fd = open(destDir + '/' + name, 'wb')
fd.write(zfile.read(name))
fd.close()
zfile.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python error extracting some zip files - python

Related

How do i upload a folder containing metadata to pinata using a script in python-brownie?

python script searching for a string in files in directory and subdirectories

How to save data from python into a csv file

Run file inside a zipfile?

How to unzip a file with Python 2.4?

Categories

Resources