I'm literally new to python and trying to learn stuff. I have a python script that basically unzips from a compressed zipped folder from source and extracts all to the destination folder. Added I also wanted to delete the source contents once it is extracted. How would I achieve this ? thanks for help in advance!
Basically,inside this path "L:\Python\Source Zipped files" I have multiple zipped folders. My query, unzips each folder and extracts to the final destination. I'm looking for an approach, like first when it unzips the first folder, and extracts and then it should be deleted from the source folder. Included a snippet of how the source folder looks like.
enter image description here
Here is my query
import os
import zipfile
import shutil
import json
data_dir = r'L:\Python\Source Zipped files'
temp_dir = r'L:\Python\temp1'
new_dir = r'L:\Python\temp2'
final_dir = r'L:\Python\Destination Unzipped files'
big_list = os.listdir(data_dir)
archive_count = 0
file_count = 152865
basename1 = os.path.join(final_dir,'GENERIC_ROUGHDRAFT')
basename2 = os.path.join(final_dir,'XACTDOC')
my_time()
archive_count = len(big_list)
logging.info('Unzipping {} archives...'.format(archive_count))
for folder in big_list:
prior_count = file_count
logging.info('Starting: {}'.format(folder))
try:
shutil.rmtree(temp_dir)
except FileNotFoundError:
pass
os.mkdir(temp_dir)
with zipfile.ZipFile(os.path.join(data_dir,folder),mode='r') as a_zip:
a_zip.extractall(path = temp_dir)
archive_count += 1
logging.info('Cumulative total of {} archive(s) unzipped'.format(archive_count))
bigger_list = os.listdir(temp_dir)
logging.info('Current archive contains {} subfolders'.format(len(bigger_list)))
for sub_folder in bigger_list:
with zipfile.ZipFile(os.path.join(temp_dir,sub_folder),mode='r') as b_zip:
b_zip.extractall(path = new_dir)
file1 = "%s (%d).%s" % (basename1, file_count, 'xml')
file2 = "%s (%d).%s" % (basename2, file_count, 'xml')
shutil.copy(os.path.join(new_dir, 'GENERIC_ROUGHDRAFT.xml'), file1)
shutil.copy(os.path.join(new_dir, 'XACTDOC.xml'), file2)
file_count += 1
logging.info('{} subfolders unzipped'.format(file_count - prior_count))
my_time()
logging.info('Total of {0} files -- {1} pairs -- should be in {2}'.format(2*(file_count-1), file_count-1, final_dir))
time.sleep(1)
my_time()
Related
So currently i have a code that passed the information to Report Portal from a XML file, this xml file located on its own folder and it applies to many folder. Currently, the parser only pass the last xml data that are stored in the memory even though it recognize all the other file
this is my code for now:
launch = service.start_launch(name=launch_name,
attributes=rp_attributes,
start_time=timestamp(),
description=launch_doc)
r_path='\\\\esw-fs01\\esw_niagara_no_bck\\BuildResults\\master\\0.1.52.68_390534\\installation_area\\autotestlogs_top'
root = os.listdir(r_path)
for entry in root:
subdir_path = os.path.join(r_path, entry) # create the absolute path of the subdir
if os.path.isdir(subdir_path): # check if it is a folder
subdir_entries = os.listdir(subdir_path) # get the content of the subdir
for subentry in subdir_entries:
if subentry.endswith('.xml'):
subentry_path = os.path.join(subdir_path, subentry)
tree = ET.parse(subentry_path)
root=tree.getroot()
for subentry_path in root.iter('entry'):
if subentry_path.get('type') == "TM":
if suite_item_id != None:
service.finish_test_item(item_id=suite_item_id, end_time=timestamp(), status=tm_verdict)
suite=subentry_path.find('name').text
description=subentry_path.find('messages').text
verdict=subentry_path.find('verdict').text
if verdict=="True":
tm_verdict="PASSED"
elif verdict=="False":
tm_verdict="FAILED"
suite_item_id = service.start_test_item(name=suite,
description=description,
attributes=rp_attributes,
start_time=timestamp(),
item_type="SUITE")
if subentry_path.get('type') == "TR":
name = subentry_path.find('name').text
verdict = subentry_path.find('verdict').text
link = subentry_path.find('link').text
duration = 10
description = subentry_path.find('messages').text
if verdict=="True":
verdict="PASSED"
elif verdict=="False":
verdict="FAILED"
start_time=timestamp()
item_id = service.start_test_item(name=name,
description=description,
start_time=start_time,
parent_item_id=suite_item_id,
item_type="STEP",
parameters={"key1": "val1",
"key2": "val2"})
the goal is to make it read all the files, any help will be apreciated
You could first build a list of paths, then in the second loop parse the files.
# don't use 'root' here
src = os.listdir(r_path)
files = list()
for entry in src:
subdir_path = os.path.join(r_path, entry)
if os.path.isdir(subdir_path):
subdir_entries = os.listdir(subdir_path)
for subentry in subdir_entries:
if subentry.endswith('.xml'):
subentry_path = os.path.join(subdir_path, subentry)
files.append(subentry_path)
for f in files:
tree = ET.parse(f)
root = tree.getroot()
for subentry_path in root.iter('entry'):
...
As a side note, it's advisable to use something more canonical to get all the files, like glob:
import glob
filelist = glob.glob(os.path.join(rpath, "**/*.xml"), recursive=True)
filelist is now a list of paths to all the xml files in the source directory. This will save you a couple of lines and indentations.
So I'm dealing with a script that needs to zip all files into a single folder that share the same name. So, for example, the folder structure looks like this...
001.flt
001.hdr
001.prj
002.flt
002.hdr
002.prj
. .
.
700.flt
700.hdr
700.prj
In order to get a file to zip, I have a script that can handle a single file but does not recognize ["*.flt", "*.hdr", "*.prj"]
Is there a workaround for getting the script to recognize the file names based on their names and group them by name as well? I would like each individual zip file to contain file contents but zip it as
001.zip, 002.zip....
meaning the zip file contains the different file extensions
001.zip(
001.hdr,
001.prj,
001.flt
)
'''
import zipfile, sys, os, glob
inDir = r"\test\DEM"
outDir = r"\test\DEM_out"
filetype = "*.flt"
def zipfiletypeInDir(inDir, outDir):
# Check that input directory exists
if not os.path.exists(inDir):
print ("Input directory %s does not exist!" % inDir)
return False
print ("Zipping filetype(s) in folder %s to output folder %s" % (inDir, outDir))
# Loop through "filetype" in input directory, glob will match pathnames from
for inShp in glob.glob(os.path.join(inDir, filetype)):
# Build the filename of the output zip file
outZip = os.path.join(outDir, os.path.splitext(os.path.basename(inShp))[0] + ".zip")
# Zip the "filetype"
zipfiletype(inShp, outZip)
return True
def zipfiletype(infiletype, newZipFN):
print ('Starting to Zip '+(infiletype)+' to '+(newZipFN))
# Delete output zipfile if it already exists
if (os.path.exists(newZipFN)):
print ('Deleting'+ newZipFN)
os.remove(newZipFN)
# Output zipfile still exists, exit
if (os.path.exists(newZipFN)):
print ('Unable to Delete'+newZipFN)
return False
# Open zip file object
zipobj = zipfile.ZipFile(newZipFN,'w')
# Loop through "filetype" components
for infile in glob.glob( infiletype.lower().replace(filetype,"*.flt")):
# Skip .zip file extension
if os.path.splitext(infile)[1].lower() != ".zip":
print ("Zipping %s" % (infile))
# Zip the "filetype" component
zipobj.write(infile,os.path.basename(infile),zipfile.ZIP_DEFLATED)
zipobj.close()
return True
if __name__=="__main__":
zipfiletypeInDir(inDir, outDir)
print ("done!")
If the possible duplicate I provided doesn't answer your question....
One way would be to iterate over all the file names and make a dictionary grouping all the files with the same name.
In [54]: import collections, os, zipfile
In [55]: zips = collections.defaultdict(list)
In [55]:
In [56]: for f in os.listdir():
...: name, ext = os.path.splitext(f)
...: zips[name].append(f)
Then iterate over the dictionary; creating a new zip file for each key and adding each key's files to it.
In [57]: outdir = r'zips'
In [58]: for k,v in zips.items():
...: zname = k+'.zip'
...: fpath = os.path.join(outdir,zname)
...: #print(fpath)
...: with zipfile.ZipFile(fpath, 'w') as z:
...: for name in v:
...: z.write(name)
I Found what I was looking for, This script identifies the names of the files and groups them based on that with the Iterator.
#group files into separate zipfolders from single directory based from
#individual file names
import fnmatch, os, glob, zipfile
#edit data folders for in and out variables
path = r"D:\Users\in_path"
out_path = D"C:\Users\out_path"
#create variables used in iterator
obj = os.listdir(path)
my_iterator = obj.__iter__()
##
#iterate each file name as '%s.*'
for obj in my_iterator:
#define name of file for rest of iterator to preform
name = os.path.splitext(obj)[0]
print (name)
#create a zip folder to store data that is being compressed
zip_path = os.path.join(out_path, name + '.zip')
#create variable 'zip' that directs the data into the compressed folder
zip = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
os.chdir(path)
#files are written to the folder with glob.glob
for files in glob.glob('%s.*' %name):
zip.write(os.path.join(path,files), files)
#print each iteration of files being written
print ('All files written to %s' %zip_path)
zip.close()
I have a folder (Molecules) with many sdf files (M00001.sdf, M00002.sdf and so on) representing different molecules. I also have a csv where each row represents the a molecule (M00001, M00002 etc).
I'm writing a code in order to get files on Molecules folder if their name is a row on the csv file.
First attempt
import os
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.splitext(files)[0] # get the basename (molecule name)
with open('molecules.csv') as ligs: # Open the csv file of molecules names
for hits in ligs:
if names == hits:
print names, hits
else:
print 'File is not here'
However this returns nothing on the command line (literally nothing). What is wrong with this code?
I am not sure that this is the best way (I only know that the following code works for my data) but if your molecule.csv has the standard csv format, i.e. "molecule1,molecule2,molecule3 ...", you can try to rearrange your code in this way:
import os
import csv
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.basename(files)
names = names.replace(".sdf","")
with open('molecules.csv','r') as ligs:
content = csv.reader(ligs)
for elem in content:
for hits in elem:
if names == hits:
print names, hits
else:
print 'File is not here'
See csv File Reading and Writing for csv module
I solved the problem with a rather brute approach
import os
import csv
import shutil
path_to_files = None # path to Molecules folder
new_path = None # new folder to save files
os.mkdir(new_path) # create the folder to store the molecules
hits = open('molecules.csv', 'r')
ligands = []
for line in hits:
lig = line.rstrip('\n')
ligands.append(lig)
for files in os.listdir(path_to_files):
molecule_name = os.path.splitext(files)[0]
full_name = '/' + molecule_name + '.sdf'
old_file = path_to_files + full_name
new_file = new_path + full_name
if molecule_name in ligands:
shutil.copy(old_file, new_file)
Im trying to put into an array files[] the paths of each file from the Data folder but when I try to go into subfolders I want it to be able to go down to the end of the Data file, for example I can read files in a subfolder of the main folder Data which im trying to get a list of all the paths of each file into an array but it doesn't go deeper it does not access the subfolder of the subfolder of Data without writing a loop. Want I want is a loop which has infinit depth of view of files in the Data folder so I can get all the file paths.
For example this is what I get:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt']
This is what I want but it can still go into deeper folders:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt', 'Data/fge/Folder/dummy.png', 'Data/fge/Folder/AnotherFolder/data.dat']
This is my current path, what would i need to add or change?
import os
from os import walk
files = []
folders = []
for (dirname, dirpath, filename) in walk('Data'):
folders.extend(dirpath)
files.extend(filename)
break
filecount = 0
for i in files:
i = 'Data/' + i
files[filecount] = i
filecount += 1
foldercount = 0
for i in folders:
i = 'Data/' + i
folders[foldercount] = i
foldercount += 1
subfolders = []
subf_files = []
for i in folders:
for (dirname, dirpath, filename) in walk(i):
subfolders.extend(dirpath)
subf_files.extend(filename)
break
subf_files_count = 0
for a in subf_files:
a = i + '/'+a
files = files
files.append(a)
print files
subf_files = []
print files
print folders
Thanks a lot!
Don't understand what are your trying to do, especially why you break your walk after the first element:
import os
files = []
folders = []
for (path, dirnames, filenames) in os.walk('Data'):
folders.extend(os.path.join(path, name) for name in dirnames)
files.extend(os.path.join(path, name) for name in filenames)
print files
print folders
I want to create a simple python script that look into folders and subfolders and create a playlist with the name of the folder containing the mp3's. But so far I have only came across python modules that work on linux OR I could not figure out howto install them (pymad)..
It's just for my android mobile so figured that the m3u format should do it.. I don't care for any other meta data than the name of the mp3 files themself.
I actually just looked at http://en.wikipedia.org/wiki/M3U and saw that it is quite easy to write m3u files... should be able to do it with simple python write to text file`
Here is my solution
import os
import glob
dir = os.getcwd()
for (path, subdirs, files) in os.walk(dir):
os.chdir(path)
if glob.glob("*.mp3") != []:
_m3u = open( os.path.split(path)[1] + ".m3u" , "w" )
for song in glob.glob("*.mp3"):
_m3u.write(song + "\n")
_m3u.close()
os.chdir(dir) # Not really needed..
I wrote up some code which will return a list of all nested playlist candidates based on your criteria:
import os
#Input: A path to a folder
#Output: List containing paths to all of the nested folders of path
def getNestedFolderList(path):
rv = [path]
ls = os.listdir(path)
if not ls:
return rv
for item in ls:
itemPath = os.path.join(path,item)
if os.path.isdir(itemPath):
rv= rv+getNestedFolderList(itemPath)
return rv
#Input: A path to a folder
#Output: (folderName,path,mp3s) if the folder contains mp3s. Else None
def getFolderPlaylist(path):
mp3s = []
ls = os.listdir(path)
for item in ls:
if item.count('mp3'):
mp3s.append(item)
if len(mp3s) > 0:
folderName = os.path.basename(path)
return (folderName,path,mp3s)
else:
return None
#Input: A path to a folder
#Output: List of all candidate playlists
def getFolderPlaylists(path):
rv = []
nestedFolderList = getNestedFolderList(path)
for folderPath in nestedFolderList:
folderPlaylist = getFolderPlaylist(folderPath)
if folderPlaylist:
rv.append(folderPlaylist)
return rv
print getFolderPlaylists('.')