Is it possible to run a .html or .exe for example, that is inside a zipfile? I'm using the Zipfile module.
Here's my sample code:
import zipfile
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
for filename in z.namelist():
#print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
g = filename
f = z.open(g)
After f = z.open(g), I don't know what to do next. I tried using the .read() but it only reads whats inside of the html, what I need is for it to run or execute.
Or is there any othere similar ways to do this?
The best approach will be to extract the required file to the Windows temp directory and execute it. I have modified your original code to create a temp file and execute it:
import zipfile
import shutil
import os
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
basename = ""
for filename in z.namelist():
print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
basename = os.path.basename(filename) #get the file name and extension from the return path
g = filename
print basename
break #found what was needed, no need to run the loop again
f = z.open(g)
temp = os.path.join(os.environ['temp'], basename) #create temp file name
tempfile = open(temp, "wb")
shutil.copyfileobj(f, tempfile) #copy unzipped file to Windows 'temp' folder
tempfile.close()
f.close()
os.system(temp) #run the file
Run the first .html file in a zip archive specified at the command line:
#!/usr/bin/env python
import os
import shutil
import sys
import tempfile
import webbrowser
import zipfile
from subprocess import check_call
from threading import Timer
with zipfile.ZipFile(sys.argv[1], 'r') as z:
# find the first html file in the archive
member = next(m for m in z.infolist() if m.filename.endswith('.html'))
# create temporary directory to extract the file to
tmpdir = tempfile.mkdtemp()
# remove tmpdir in 5 minutes
t = Timer(300, shutil.rmtree, args=[tmpdir], kwargs=dict(ignore_errors=True))
t.start()
# extract the file
z.extract(member, path=tmpdir)
filename = os.path.join(tmpdir, member.filename)
# run the file
if filename.endswith('.exe'):
check_call([filename]) # run as a program; wait it to complete
else: # open document using default browser
webbrowser.open_new_tab(filename) #NOTE: returns immediately
Example
T:\> open-from-zip.py file.zip
As an alternative to webbrowser you could use os.startfile(os.path.normpath(filename)) on Windows.
Related
My code currently unzips one zip folder and finds the file called file.txt and extracts it. Now I need to unzip multiple folders that have the extension .zip. I have tried to use code similar to what I need it to do but the problem is that now I have to find a file called file.txt in each of those .zip folders and extract that file only . Also to store file.txt into a separate folder that has the same name where it came from. Thank you in advance for your time.
import re
import os
from zipfile import ZipFile
def pain():
print("\t\t\tinput_files.zip has been unzipped")
with ZipFile('input_files.zip', 'r') as zipObj:
zipObj.extractall()
listOfFileNames = zipObj.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.extract(fileName, 'storage')
outfile = "output2.txt" #this will be the filename that the code will write to
baconFile = open(outfile,"wt")
file_name1 = "file.txt"
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
#for filename in os.listdir(os.getcwd() + "/input_files"):
for filename in os.listdir('C:\Users\M29858\Desktop\TestPy\Version10\input_files'):
with open("input_files/" + filename, 'r') as f:
if file_name1 in filename:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
pain()
#THIS WORKS
import glob
import os
from zipfile import ZipFile
def main():
for fname in glob.glob("*.zip"): # get all the zip files
with ZipFile(fname) as archive:
# if there's no file.txt, ignore and go on to the next zip file
if 'file.txt' not in archive.namelist(): continue
# make a new directory named after the zip file
dirname = fname.rsplit('.',1)[0]
os.mkdir(dirname)
extract file.txt into the directory you just created
archive.extract('file.txt', path=dirname)
I have the following code:
os.listdir("staging")
# Seperate filename from extension
sep = os.sep
# Change the casing
for n in os.listdir("staging"):
print(n)
if os.path.isfile("staging" + sep + n):
filename_one, extension = os.path.splitext(n)
os.rename("staging" + sep + n, "staging" + sep + filename_one.lower() + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
# Remove the blanks, -, %, and /
for n in os.listdir("staging"):
print (n)
if os.path.isfile("staging" + sep + n):
filename_zero, extension = os.path.splitext(n)
os.rename("staging" + sep + n , "staging" + sep + filename_zero.replace(' ','_').replace('-','_').replace('%','pct').replace('/','_') + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
"""
In order to fix all of the column headers and to solve the encoding issues and remove nulls,
first read in all of the CSV's to python as dataframes, then make changes and rewrite the old files
"""
import os
import glob
import pandas as pd
files = glob.glob(os.path.join("staging" + "/*.csv"))
print(files)
# Create an empty dictionary to hold the dataframes from csvs
dict_ = {}
# Write the files into the dictionary
for file in files:
dict_[file] = pd.read_csv(file, header = 0, dtype = str, encoding = 'cp1252').fillna('')
In the dictionary, the dataframes are named as "folder/name(csv)" what I would like to do is remove the prefix "staging/" from the keys in the dictionary.
How can I do this?
If all you want to do is truncate the file paths to just the filename, you can use os.path.basename:
for file in files:
fname = os.path.basename(file)
dict_[fname] = (pd.read_csv(file, header=0, dtype=str, encoding='cp1252')
.fillna(''))
Example:
os.path.basename('Desktop/test.txt')
# 'test.txt'
import os
pathname ='c:\\hello\\dickins\\myfile.py'
head, tail = os.path.split(pathname)
print head
print tail
This article here worked out just fine for me
import os
inputFilepath = 'path/to/file/foobar.txt'
filename_w_ext = os.path.basename(inputFilepath)
filename, file_extension = os.path.splitext(filename_w_ext)
#filename = foobar
#file_extension = .txt
path, filename = os.path.split(path/to/file/foobar.txt)
# path = path/to/file
# filename = foobar.txt
Hope it helps someone searching for this answer
In the same spirt as truncate the file paths, use pathlib in python standard library. It will turn the path into an easy to use class.
from pathlib import Path
path = Path('Desktop/folder/test.txt')
path.name # test.txt
path.stem # test
path.suffix # .txt
path.parent.name # folder
path.parent.name.name # Desktop
As ColdSpeed said, you can use "os.path.basename" to truncate a file to its name, but I think what you are refering to is the ability to pycache the data?
For Example here is my Directory:
You see the pycache folder? that initializes it as a module.
Then, you can import a file from that module (for example the staging.txt file and operate on it.)
I use the IpConfig.txt File from the assets folder level (or should be) and take a line of information out of it.
import pygame as pyg
import sys
import os
import math
import ssl
import socket as sock
import ipaddress as ipad
import threading
import random
print("Modules Installed!")
class two:
# Find out how to refer to class super construct
def main(Display, SecSock, ipadd, clock):
# I have code here that has nothing to do with the question...
def __init__():
print("Initializing[2]...")
# Initialization of Pygame and SSL Socket goes here
searchQuery = open("IpConfig.txt", 'r') #Opening the File IpConfig(Which now should open on the top level of the game files)
step2 = searchQuery.readlines()# read the file
ipadd = step2[6] # This is what you should have or something similar where you reference the line you want to copy or manipulate.
main(gameDisplay, SSLSock, ipadd, clock)# Im having issues here myself - (main() is not defined it says)
print(ipadd)
print("Server Certificate Configuration Enabled...")
__init__() # Start up the procedure
Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))
I have some txt files in a directory and I need to get the last 15 lines from all of them. How could I do it using python?
I chose this code:
from os import listdir
from os.path import isfile, join
dir_path= './'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
out = []
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
out.append(filedata)
f = open(r'./fin.txt','w')
f.writelines(out)
f.close()
but I get the error "TypeError: writelines() argument must be a sequence of strings". I think it's because of Russian letters in the lines.
import os
from collections import deque
for filename in os.listdir('/some/path'):
# might want to put a check it's actually a file here...
# (join it to a root path, or anything else....)
# and sanity check it's text of a usable kind
with open(filename) as fin:
last_15 = deque(fin, 15)
deque will automatically discard the oldest entry and peak the max size to be 15, so it's an efficient way of keeping just the "last" 'n' items.
Try this:
from os import listdir
from os.path import isfile
for filepath in listdir("/path/to/folder")
if isfile(filepath): # if need
last_five_lines = open(filepath).readlines()[-15:]
# or, one line:
x = [open(f).readlines()[-15:] for f in listdir("/path/to/folder") if isfile(f)]
Updated:
lastlines = []
for file in files:
lastlines += open(join(dir_path, file), "r").readlines()[-15:]
with open('./fin.txt', 'w') as f:
f.writelines(lastlines)
from os import listdir
from os.path import isfile, join
dir_path= '/usr/lib/something'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
#do something with the filedata
Hope this helps:
import os
current_dir = os.getcwd()
dir_objects = os.listdir(current_dir)
dict_of_last_15 = {}
for file in dir_objects:
file_obj = open(file, 'rb')
content = file_obj.readlines()
last_15_lines = content[-15:]
dict_of_last_15[file] = last_15_lines
print "#############: %s" % file
print dict_of_last_15[file]
file_to_check.close()
My program runs smoothly but I want my files from ftp to be zip in my local drive
The problem is only 1 file is being zipped after calling my main() function
Here's my code:
import os
import upload
import download
import zipfile
import ConfigParser
import ftputil
def main():
#create a folder Temp on d drive for later use
path = r'D:\Temp'
os.mkdir(path)
#parse all the values at config.ini file
config = ConfigParser.ConfigParser()
config.readfp(open('config.ini'))
server = config.get('main', 'Server')
username = config.get('main', 'Username')
password = config.get('main', 'Password')
uploads = config.get('main', 'Upload folder')
downloads = config.get('main', 'Download folder')
#connect to ftp
ftp = ftputil.FTPHost(server, username, password)
dirlist = ftp.listdir(downloads)
for list in dirlist:
ftp.chdir(downloads)
target = os.path.join(path, list)
ftp.download(list, target)
#########################################################
# THis section is where algo fails but the program run#
########################################################
#zipping files
absolute_path = r'D:\Temp'
dirlist = os.listdir(absolute_path)
filepath = r'D:\Temp\project2.zip'
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'w')
zip_name.write(get_file, 'Project2b\\' + list)
if __name__ == '__main__':
print "cannot be"
When you do this :
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'w')
zip_name.write(get_file, 'Project2b\\' + list)
you recreate a ZipFile for each file you want to zip, the "w" mode means you recreate it from scratch.
Try this (create the zip file before the loop) :
zip_name = zipfile.ZipFile(filepath, 'w')
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name.write(get_file, 'Project2b\\' + list)
Or this, it will open the zipfile in append mode:
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'a')
zip_name.write(get_file, 'Project2b\\' + list)
Have a look at the shutil module. There is an example using shutil.make_archive():
http://docs.python.org/library/shutil.html
If you have a lot of files you can zip them in parallel:
import zipfile
from pathlib import Path, WindowsPath
from typing import List, Text
import logging
from time import time
from concurrent.futures import ThreadPoolExecutor
logging.basicConfig(
format="%(asctime)s - %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG
)
PATH = (r"\\some_directory\subdirectory\zipped")
def file_names() -> List[WindowsPath]:
p = Path(PATH)
file_names = list(p.glob("./*.csv"))
logging.info("There are %d files", len(file_names))
return file_names
def zip_file(file: WindowsPath) -> None:
zip_file_name = Path(PATH, f"{file.stem}.zip")
with zipfile.ZipFile(zip_file_name, "w") as zip:
zip.write(file, arcname=file.name, compress_type=zipfile.ZIP_DEFLATED)
def main(files: List[Text]) -> None:
t0 = time()
number_of_files = len(files)
with ThreadPoolExecutor() as executor:
for counter, _ in enumerate(executor.map(zip_file, files), start=1):
# update progress every 100 files
if counter % 100 == 0:
logging.info(
"Processed %d/%d. TT: %d:%d",
counter,
number_of_files,
*divmod(int(time() - t0), 60),
)
logging.info(
"Finished zipping %d files. Total time: %d:%d",
len(files),
*divmod(int(time() - t0), 60),
)
if __name__ == "__main__":
files = file_names()
main(files)
Best way to do this is by putting debug statements at your for loops, there are two possibilities;
one is that the first forloop only downloads one file from the ftp folder
two is that the first loop downloads all files but second loop zips only one of them
use print statements to see which files are downloaded/zipped at the loops, good luck