Python - Creating bad zip files with ZipFile - python

My script is downloading files from URLs located in a text file, saving them temporarily to a given location, and then adding them to an already existing zip file in the same directory. The files are being downloaded successfully, and no errors are raised when adding to the zip files, but for some reason, most of the resulting zip files are un-openable by the OS, and when I z.printdir() on them, they do not contain all the expected files.
relevant code:
for root, dirs, files in
os.walk(os.path.join(downloadsdir,dir_dictionary['content']), False):
if "artifacts" in root:
solution_name = root.split('/')[-2]
with open(os.path.join(root,'non-local-files.txt')) as file:
for line in file:
if "string" in line:
print('\tDownloading ' + urllib.unquote(urllib.unquote(line.rstrip())))
file_name = urllib.unquote(urllib.unquote(line.rstrip())).split('/')[-1]
r = requests.get(urllib.unquote(urllib.unquote(line.rstrip())))
with open(os.path.join(root,file_name), 'wb') as temp_file:
temp_file.write(r.content)
z = zipfile.ZipFile(os.path.join(root, solution_name + '.zip'), 'a')
z.write(os.path.join(root,file_name), os.path.join('Dropoff', file_name))
I guess my question is: am I doing something inherently wrong in the code, or do I have to look at the actual files being added to the zip files? The files are all OS-readable and appear normal as far as I can tell. Kind of at a loss as to how to proceed.

for root, dirs, files in
os.walk(os.path.join(downloadsdir,dir_dictionary['content']), False):
if "artifacts" in root:
solution_name = root.split('/')[-2]
with open(os.path.join(root,'non-local-files.txt')) as file:
for line in file:
if "string" in line:
print('\tDownloading ' + urllib.unquote(urllib.unquote(line.rstrip())))
file_name = urllib.unquote(urllib.unquote(line.rstrip())).split('/')[-1]
r = requests.get(urllib.unquote(urllib.unquote(line.rstrip())))
with open(os.path.join(root,file_name), 'wb') as temp_file:
temp_file.write(r.content)
z = zipfile.ZipFile(os.path.join(root, solution_name + '.zip'), 'a')
try:
z.write(os.path.join(root,file_name), os.path.join('Dropoff', file_name))
finally:
z.close()
PS:
https://docs.python.org/2/library/zipfile.html
Note
Archive names should be relative to the archive root, that is, they should not start with a path separator.
here is no official file name encoding for ZIP files. If you have unicode file names, you must convert them to byte strings in your desired encoding before passing them to write(). WinZip interprets all file names as encoded in CP437, also known as DOS Latin.

Related

Modify all PHP files within directory and subdirectories with Python

I'm having issues scanning through a root directory and modifying all .php files that contain a certain reference. Essentially, we're looking to move our entire database. I have to find all records of certain tables and rename them appropriately. Here's the code I have so far:
import os
import re
directory = 'C:/Users/me/Desktop/wsphp'
for root, dirs, files in os.walk(directory):
for filename in files:
if filename.endswith('.php'):
print(filename)
open_file = open(filename, 'r')
read_file = open_file.read()
regex = re.compile('OLD DATABASE NAME')
read_file = regex.sub('NEW DATABASE NAME', read_file)
write_file = open(filename, 'w')
write_file.write(read_file)
My code breaks when it attempts to open the file. The problem seems to be that 'filename' refers to JUST the filename without the entire directory ('index.php' rather than 'C:/Users/me/Desktop/wsphp/Subfolder/Subfolder2/index.php'). The root directory contains a few .php files as well as a bunch of subdirectories. Is there an easier way to go about this?
As you suspected, filename is just the filename. The path to the file is stored in root, so you need to do
open_file = open(os.path.join(root, filename), 'r')

Python change strings line by line in all files within a directory

So I have some files located in directory.
Some of the files contain paths like this and some are empty: C:\d\folder\project\folder\Folder1\Folder2\Folder3\Module.c
What would be the best way to cut it just by counting backslashes from the end: So in this case we need to cut everything what is after 4th backslash when counting backward:
Folder1\Folder2\Folder3\Module.c
I need some function that will go through all files and do this on each line of a file.
Current code which do not work for some reason is:
directory = os.listdir(//path_to_dir//)
for file in directory:
with open (file) as f:
for s in f:
print('\\'.join(s.split('\\')[-4:]))
I would try something like this:
from pathlib import Path
def change(s):
return '\\'.join(s.split('\\')[-4:])
folder = Path.cwd() / "folder" # here is your folder with files
files = folder.glob("*")
for f in files:
with open(f, "r") as file:
content = file.read()
lines = content.split('\n')
new_lines = []
for line in lines:
new_lines.append(change(line))
with open(f, "w") as file:
file.write("\n".join(new_lines))
It look for all files in the subfolder folder, does replacing on every line of every file and saves the files.

How to open files in a directory starting from a specific file

In order to open all the files in a specific directory (path). I use the following code:
for filename in os.listdir(path): # For each file inside path
with open(path + filename, 'r') as xml_file:
#Do some stuff
However, I want to read the files in the directory starting from a specific position. For instance, if the directory contains the files f1.xml, f2.xml, f3.xml, ... ,f10.xml in this order, how can I read all the files starting from f3.xml (and ignore f1.xml and f2.xml) ?
Straightforward way
import os
keep = False
first = 'f3.xml'
for filename in os.listdir(path): # For each file inside path
keep = keep or filename == first
if keep:
with open(path + filename, 'r') as xml_file:
#Do some stuff

python iterating over files and appending to txt

I'm trying to iterate over files in a folder, but the code is always executed on only one file (there are two files at the moment). The program should open the txt file, convert it to a Python list, and write the list to a new created file (each separately). I can't find the bug alone.
import io
import os
'''printing program directory'''
ANVCONDA_directory = os.path.dirname(os.path.realpath(__file__)) + os.sep
print ANVCONDA_directory
inputdir = ANVCONDA_directory + "TEMP"
print os.listdir(inputdir)
'''counting files in folder'''
path, dirs, files = os.walk(inputdir).next()
file_count = len(files)
print file_count
for filename in os.listdir(inputdir):
#opening txt file to extract data
with io.open(inputdir + "\\" + filename, "r", encoding="cp1250") as file:
ocr_results = [line.strip() for line in file]
#spliting into list
for line in ocr_results:
print("[" + line + "]")
#writing list to file
import pickle
with open('outfile' + filename, 'wb') as fp:
pickle.dump(ocr_results, fp)
The indentation for the block of code where you are writing the output seems to be wrong. It should be within the for loop. Since it is currently outside the loop it is only executed after the loop above has ended.
Another point that I would like to mention is that os.listdir includes everything in the folder including sub directories. Maybe you should look at os.isdir if you want to skip directories

Python script for moving directories/files while ignoring certain directories/files using shutil?

I'm looking to build a python script that moves files/directories from one directory to another while referencing a list that notes the files to be copied over.
Here is what I have thus far:
import os, shutil
// Read in origin & destination from secrets.py Readlines() stores each line followed by a '/n' in a list
f = open('secrets.py', 'r')
paths = f.readlines()
// Strip out those /n
srcPath = paths[0].rstrip('\n')
destPath = paths[1].rstrip('\n')
// Close stream
f.close()
// Empty destPath
for root, dirs, files in os.walk(destPath, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
// Copy & move files into destination path
for srcDir, dirs, files in os.walk(srcPath):
destDir = srcDir.replace(srcPath, destPath)
if not os.path.exists(destDir):
os.mkdir(destDir)
for file in files:
srcFile = os.path.join(srcDir, file)
destFile = os.path.join(destDir, file)
if os.path.exists(destFile):
os.remove(destFile)
shutil.copy(srcFile, destDir)
The secrets.py files contains the src/dest paths.
Currently this transfers all files/directories over. I'd like to read in another file that allows you to specify which files to transfer (rather than making a "ignore" list).
You should read the file list
f = open('secrets.py', 'r')
paths = f.readlines()
f_list = open("filelist.txt", "r")
file_list = map(lambda x: x.rstrip('\n'), f_list.readlines())
....
....
and check before copying
for file in files:
if file in file_list# <--- this is the condition you need to add to your code
srcFile = os.path.join(srcDir, file)
....
if your file list contains pattern of file names to be copied try using "re" module of python to match your file name.

Categories

Resources