Passing a relative path in a function - python

Can someone tell me if the following function declaration is the correct way to pass a relative path to a function? The call is only taking one variable. When I include a second variable (absolute path), my function does not work.
def extract(tar_url, extract_path='.'):
The call that does not work:
extract(chosen, path)
This works, but does not extract:
extract(chosen)
Full Code:
def do_fileExtract(self, line):
defaultFolder = "Extracted"
if not defaultFolder.endswith(':') and not os.path.exists('c:\\Extracted'):
os.mkdir('c:\\Extracted')
raw_input("PLACE .tgz FILES in c:\Extracted AT THIS TIME!!! PRESS ENTER WHEN FINISHED!")
else:
pass
def extract(tar_url, extract_path='.'):
print tar_url
tar = tarfile.open(tar_url, 'r')
for item in tar:
tar.extract(item, extract_path)
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
userpath = "Extracted"
directory = os.path.join("c:\\", userpath)
os.chdir(directory)
path=os.getcwd() #Set log path here
dirlist=os.listdir(path)
files = [fname for fname in os.listdir(path)
if fname.endswith(('.tgz','.tar'))]
for item in enumerate(files):
print "%d- %s" % item
try:
idx = int(raw_input("\nEnter the file's number:\n"))
except ValueError:
print "You fail at typing numbers."
try:
chosen = files[idx]
except IndexError:
print "Try a number in range next time."
newDir = raw_input('\nEnter a name to create a folder a the c: root directory:\n')
selectDir = os.path.join("c:\\", newDir)
path=os.path.abspath(selectDir)
if not newDir.endswith(':') and not os.path.exists(selectDir):
os.mkdir(selectDir)
try:
extract(chosen, path)
print 'Done'
except:
name = os.path.basename(sys.argv[0])
print chosen

It looks like you missed an escape character in "PLACE .tgz FILES in c:\Extracted AT THIS TIME!!! PRESS ENTER WHEN FINISHED!"
I don't think raw_input sees the prompt string as a raw string, just the user input.
But this shouldn't affect the functionality of your program.
Are you on Unix or windows? I was under the impression that the on Unix you use / forward slash instead of \\ backslash as a separator.
I tested some code on this file:
http://simkin.asu.edu/geowall/mars/merpano0.tar.gz
The following code:
>>> from os import chdir
>>> import tarfile
>>> chdir(r'C:\Users\Acer\Downloads')
>>> tar_url = 'merpano0.tar.gz'
>>> print tar_url
merpano0.tar.gz
>>> tar = tarfile.open(tar_url, 'r')
>>> extract_path = 'C:\\Users\\Acer\\Downloads\\test\\'
>>> for item in tar:
tar.extract(item, extract_path)
executed cleanly with no problems on my end. In the test directory I got a single folder with some files, exactly as in the original tar file. Can you explain what you're doing differently in your code that might be bugging up?

Related

Python: simple batch rename files in windows folder

Trying to create a simple code, to batch rename a folder in windows.
Musts:
change every number , like "file02.txt", turn 02 into 0002
maybe work for every file format, like jpg, png, txt, docx and so on (becuse I'm not sure what will be in the folder, this code might be used for image sequences...)
Is this possible?
I did test versions, combination of the little knowledge I have, but it gets me confused.
my code so far:
import os
import sys
folder_path = os.listdir(raw_input("Insert folder path: "))
print "Files in folder: %s" % folder_path
# a split tool
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
# a function to make a new name with needed 0000
def new_filename(filename):
file_name_part, ext = os.path.splitext(filename) # file01 and .ext
original_name, number = mysplit(file_name_part) # 01 and file
add_zero = number.rjust(4, "0") # add 0001
new_name = original_name + add_zero + ext
print new_name
# new_name comes like this ['file0001.txt'] but seperate, not in a list? Why?
for current_file_n in folder_path:
new = new_filename(current_file_n)
print list([new]) # trying to make the str into a list....
re_name = os.renames(current_file_n, new)
print re_name
print "Renamed files: %s" % folder_path
The desired outcome is the same as the beginning list, but collated with zeros,like this: ['file0001.txt', 'file0002.txt', 'file0003.txt'......'file0015.txt']
I've got errors like windows error: can't find file, and another error; can't connect str and list?
I need an explanation of what I'm doing wrong as simple as possible, or is there another method that I can use that will give me the desired outcome?
As martineau said your indentation is messed up.
Here's the working code:
import os
import sys
# a split tool
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
# a function to make a new name with needed 0000
def new_filename(filename):
file_name_part, ext = os.path.splitext(filename) # file01 and .ext
original_name, number = mysplit(file_name_part) # 01 and file
add_zero = number.rjust(4, "0") # add 0001
new_name = original_name + add_zero + ext
return new_name
# new_name comes like this ['file0001.txt'] but seperate, not in a list? Why?
if __name__ == '__main__':
folder_path = os.listdir(raw_input("Insert folder path: "))
print "Files in folder: %s" % folder_path
renamed_files = []
for current_file_n in folder_path:
new = new_filename(current_file_n)
renamed_files.append(new) # Add renamed file's name to a list
try:
os.renames(current_file_n, new) #It doesn't return anything
print new
except:
print "Unexpected error while renaming %s:%s"%(new, sys.exc_info()[0])
print "Renamed files: %s" % renamed_files
Hope this helps
Your code can be simplified a lot by using regular expression substitution. re.sub() can take a replacement function. In this case adding leading zeroes to the first number found in the filename.
import os, re
def renumber_files(directory, zeroes=4):
os.chdir(directory)
for filename in os.listdir(directory):
new_name = re.sub(r'\d+', lambda m: m.group().zfill(zeroes), filename, count=1)
os.rename(filename, new_name)
renumber_files(raw_input("Insert folder path: "))
This works because re.sub() can take a callable as the replacement argument.
Signature: re.sub(pattern, repl, string, count=0, flags=0)
Return the string obtained by replacing the leftmost non-overlapping
occurrences of the pattern in string by the replacement repl. repl
can be either a string or a callable; if a string, backslash escapes
in it are processed. If it is a callable, it's passed the match
object and must return a replacement string to be used.
In the lambda m.group() returns a string matching the pattern \d+. For instance "1", "564645" or "005".
The next step, str.zfill(4), turns those into "0001", "564645", or "0005".

XLRD/Python: Encrypte Excel or make exception

I need to go through the subdirectories of a given directory, search for excel files and then read their sheet names. A problem occurs when the loop finds an encrypted file. I tried to read files with xlrd and pandas. But I get an error:
xlrd.XLRDError Workbook is encrypted
I made an exception with pass method, but if I do so, the loop breaks at this point and the program stops. What do I have to do to pass that error and move on to the next file? Is there any method to check if the file is encrypted before the method xlrd.open_workbook which collapses the program? Or is it possible to make an exception where I could pass the error and move on?
import codecs
import xlrd
import os
import Tkinter
import tkFileDialog
def pyrexcel(path,file,list):
print 'do now: ' + path + '/' + file
workbook = xlrd.open_workbook(path + '/' + file, encoding_override='windows-1250')
sheet_names = workbook.sheet_names()
for sheet in sheet_names:
list.append(path+"/"+file+"/"+sheet)
def finder(destin,listka,list):
for pliczek in os.listdir(destin):
if os.path.isdir(destin +"/" +pliczek):
finder(destin +"/" +pliczek,listka,list)
elif pliczek[-4:] == ".xls":
pyrexcel(destin,pliczek,list)
elif pliczek[-4:] == ".rar" or pliczek[-4:] == ".zip":
listka.append(destin+pliczek)
root = Tkinter.Tk()
root.withdraw()
listaExcel = []
listaZip = []
dirname = tkFileDialog.askdirectory(parent=root, initialdir="/", title='choose dir to iterate')
print "you choose: " + dirname
try:
finder(dirname,listaZip,listaExcel)
except xlrd.XLRDError as e:
print e.message
pass
plik = codecs.open((dirname + "/listaZIP.txt"), 'w', "utf-8")
for item in listaZip:
plik.write("%s\n" % item)
plik.close()
plik = codecs.open((dirname + "/listaExcel.txt"), 'w', "utf-8")
for item in listaExcel:
plik.write("%s\n" % item)
plik.close()
continue should accomplish what you want as listed here:
"pass simply does nothing, while continue goes on with the next loop iteration. In your example, the difference would become apparent if you added another statement after the if: After executing pass, this further statement would be executed. After continue, it wouldn't."
Also I found this answer while trying to look for a way to find out if an Excel file is encrypted via python so thank you! :D
Source: Is there a difference between `continue` and `pass` in a for loop in python?
Python Docs: https://docs.python.org/2/reference/simple_stmts.html#continue

Python: detecting existing file : os.file.exists

I'm obviously doing something very wrong. I'd like to find files, that are in one directory but not in second directory (for instance xxx.phn in one directory and xxx.wav in second directory...
IT seems that I cannot detect, when file is NOT present in second directory (it's always showing like all files are)... I don't get any file displayed, although they exist...
import shutil, random, os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
basename = os.path.splitext(fname)[0]
if (not os.path.exists(dest_folder+'/'+basename + '.' + dest_ext) ):
print str(i)+': No duplicate for: '+fname
i=i+1
print str(i)+' files found'
Can I suggest that you make the filename you're looking at checking and print it before checking whether it exists..
dest_fname = dest_folder+'/'+basename + '.' + dest_ext
print "dest exists? %s" % dest_fname
os.path.exists(dest_fname)
Also as an aside please join paths using the join() method. (If you really want the basename without the leading path elements there's a basename() function).
I tried your program out and it worked for two simple flat directories. Here are the directory contents:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
b\a.csv
b\c.csv
And result of your script with a txt b csv as parameters. If your result was different, maybe you used different parameters?
0: No duplicate for: b.txt
1 files found
But when I added subdirectories:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
a\c\d.txt
a\c\e.txt # Missing from b\c directory
b\a.csv
b\c.csv
b\c\d.csv
Your script gives:
0: No duplicate for: b.txt
1: No duplicate for: d.txt # Error here
2: No duplicate for: e.txt
3 files found
To work with sub-directories you need to compute the path relative to the source directory, and then add it to the destination directory. Here's the result with a few other minor cleanups and prints to see what is going on. Note that fname is always just the file name and needs to be joined with d to get the whole path:
#!python2
import os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
relpath = os.path.relpath(os.path.join(d,fname),folder)
relbase = os.path.splitext(relpath)[0]
path_to_check = os.path.join(dest_folder,relbase+'.'+dest_ext)
if not os.path.exists(path_to_check):
print '{}: No duplicate for: {}, {} not found.'.format(i,os.path.join(folder,relpath),path_to_check)
i += 1
print i,'files found'
Output:
0: No duplicate for: a\b.txt, b\b.csv not found.
1: No duplicate for: a\c\e.txt, b\c\e.csv not found.
2 files found
What you're doing is looking for are matching files, not duplicate ones. One problem is that you're not using use the source_ext argument when searching. Another is I think the command-line argument handling is messed-up. Here's a corrected version that accomplishes what you're trying to do:
import os
import sys
if len(sys.argv) != 5:
print("usage: python "
"del_orphans_dir1_dir2.py " # argv[0] (script name)
"source_folder " # argv[1]
"source_ext " # argv[2]
"dest_folder " # argv[3]
"dest_ext") # argv[4]
sys.exit(2) # command line error
source_folder, source_ext, dest_folder, dest_ext = sys.argv[1:6]
dest_ext = dest_ext if dest_ext.startswith('.') else '.'+dest_ext # check dot
found = 0
for d, ds, fs in os.walk(source_folder):
for i, fname in enumerate(fs, start=1):
basename, ext = os.path.splitext(fname)
if ext == source_ext:
if os.path.exists(os.path.join(dest_folder, basename+dest_ext)):
found += 1
else:
print '{}: No matching file found for: {}'.format(i, fname)
print '{} matches found'.format(found)
sys.exit(0)

Recursively renaming directory/file structures on a local file system

I'm trying to define arg1 outside of rename() and it does not work since dirs is not defined. If I use rename("dirs", False), the function does not work.
Any idea?
# Defining the function that renames the target
def rename(arg1, arg2):
for root, dirs, files in os.walk( # Listing
path, topdown=arg2):
for i, name in enumerate(arg1):
output = name.replace(pattern, "") # Taking out pattern
if output != name:
os.rename( # Renaming
os.path.join(root, name),
os.path.join(root, output))
else:
pass
# Run
rename(dirs, False)
Here's the whole program:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# This program batch renames files or folders by taking out a certain pattern
import os
import subprocess
import re
# Defining the function that renames the target
def rename(arg1, arg2):
for root, dirs, files in os.walk( # Listing
path, topdown=arg2):
for i, name in enumerate(arg1):
output = name.replace(pattern, "") # Taking out pattern
if output != name:
os.rename( # Renaming
os.path.join(root, name),
os.path.join(root, output))
else:
pass
# User chooses between file and folder
print "What do you want to rename?"
print "1 - Folders\n2 - Files\n"
valid = False
while not valid:
try:
choice = int(raw_input("Enter number here: "))
if choice > 2:
print "Please enter a valid number\n"
valid = False
else:
valid = True
except ValueError:
print "Please enter a valid number\n"
valid = False
choice = 3 # To have a correct value of choice
# Asking for path & pattern
if choice == 1:
kind = "folders"
elif choice == 2:
kind = "files"
else:
pass
path = raw_input("What is the path to the %s?\n " % (kind))
pattern = raw_input("What is the pattern to remove?\n ")
# CHOICE = 1
# Renaming folders
if choice == 1:
rename(dirs, False)
# CHOICE = 2
# Renaming files
if choice == 2:
rename(files,True)
# Success message
kind = kind.replace("f", "F")
print "%s renamed" % (kind)
Recorrect my code in a better way.
#!/usr/bin/env python
import os
import sys
# the command like this: python rename dirs /your/path/name/ tst
if __name__ == '__main__':
mode = sys.argv[1] # dirs or files
pathname = sys.argv[2]
pattern = sys.argv[3]
ndict = {'dirs': '', 'files': ''}
topdown = {'dirs': False, 'files': True}
for root, ndict['dirs'], ndict['files'] in os.walk(
pathname, topdown[mode]):
for name in enumerate(ndict[mode]):
newname = name.replace(pattern, '')
if newname != name:
os.rename(
os.path.join(root, name),
os.path.join(root, newname))
This is better achieved as a command-line tool using the py library:
import sys
from py.path import local # import local path object/class
def rename_files(root, pattern):
"""
Iterate over all paths starting at root using ``~py.path.local.visit()``
check if it is a file using ``~py.path.local.check(file=True)`` and
rename it with a new basename with ``pattern`` stripped out.
"""
for path in root.visit(rec=True):
if path.check(file=True):
path.rename(path.new(basename=path.basename.replace(pattern, "")))
def rename_dirs(root, pattern):
"""
Iterate over all paths starting at root using ``~py.path.local.visit()``
check if it is a directory using ``~py.path.local.check(dir=True)`` and
rename it with a new basename with ``pattern`` stripped out.
"""
for path in root.visit(rec=True):
if path.check(dir=True):
path.rename(path.new(basename=path.basename.replace(pattern, "")))
def main():
"""Define our main top-level entry point"""
root = local(sys.argv[1]) # 1 to skip the program name
pattern = sys.argv[2]
if local(sys.argv[0]).purebasename == "renamefiles":
rename_files(root, pattern)
else:
rename_dirs(root, pattern)
if __name__ == "__main__":
"""
Python sets ``__name__`` (a global variable) to ``__main__`` when being called
as a script/application. e.g: Python renamefiles or ./renamefiles
"""
main() # Call our main function
Usage:
renamefiles /path/to/dir pattern
or:
renamedirs /path/to/dir pattern
Save this as renamefiles or renamedirs.
A common approach in UNIX is to name the script/tool renamefiles and symlink renamefiles to renamedirs.
Improvement Notes:
Use optparse or argparse to provide Command Line Options = and a --help
Make rename_files() and rename_dirs() generic and move it into a single function.
Write documentation (docstrings)
Write unit tests.

Why the extract function stopped extracting?

Can someone explain and help me resolve why my function stopped extracting .tgz files when I added a counter to create folders with different names to keep the extracted folder from overwriting the previous one when I extracted another .tgz file in the same directory? What am I doing wrong? Thanks! Below are the two functions ... the first function extracts files properly; the second function extracts a numbered folder and quits.
Works:
def extract(tar_url, extract_path='.'):
print tar_url
tar = tarfile.open(tar_url, 'r')
for item in tar:
tar.extract(item, extract_path)
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
Does not work:
global counter
counter=1
def extract(tar_url, extract_path='.'):
global counter
print tar_url
tar = tarfile.open(tar_url, 'wb')# changed from r to wb 6/12
for item in tar:
tar.extract(item, extract_path+"_%d"%counter)
counter+=1
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
Here is how I call it in main (I'm using easygui):
direct = diropenbox(msg="Choose path to place extracted files!", title='SQA Extractor', default='c:\\Extracted')
msg = "Are you sure you want to extract?"
title = "Confirm"
os.chdir(direct)
try:
for root, dirname, files in os.walk(directory):
for file1 in files:
if file1.endswith(".tgz") or file1.endswith(".tar"):
extract(os.path.join(root, file1))
Perhaps it was this change that broke your code:
tar = tarfile.open(tar_url, 'r')
Changed to:
tar = tarfile.open(tar_url, 'wb')# changed from r to wb 6/12
Does the extract path with the counter exist?
for item in tar:
os.mkdir(extract_path + "_%d" % counter)
tar.extract(item, extract_path+"_%d" % counter)
counter+=1
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
The original version relies on the created folder names matching the relative paths specified in the archive. In the new version, the recursive call tries to put the files into a folder without a 'tag' number, after extracting the other files at that level into one that does.
Try adding the tag to the path name used for the recursive call as well.
BTW, the Python-idiomatic spelling of item.name.find(".tar") != -1 is '.tar' in item.name.

Categories

Resources