Python: How to remove string from string - python

I have this snippet of code that looks like this:
server_directory = "/Users/storm/server"
def get_directory(self, username):
home = server_directory + "/" + username
typic = os.getcwd()
if typic == server_directory:
return "/"
elif typic == home:
return "~"
else:
return typic
And every-time I change the directory out of the two nice server directory and home directory of the user, it would look like /Users/storm/server/svr_user. How do I make it /svr_user2 instead of /Users/storm/server/svr_user, since I would like to emulate a home directory and a virtual "root" directory?

Although you can do a lot with string manipulation, a better way would be using os.path:
import os
src = '/Users/storm/server/svr_user'
dst = '/svr_user2'
a = '/Users/storm/server/svr_user/x/y/z'
os.path.join(dst, os.path.relpath(a, src))
returns
'/svr_user2/x/y/z'

The not so politically correct alternative of eumiro's answer would be:
import re
src = '/Users/storm/server/svr_user'
dst = '/svr_user2'
a = '/Users/storm/server/svr_user/x/y/z'
re.sub(src, dst, a, 1)
Which yields:
'/svr_user2/x/y/z'
Notice the 1 which means replace once.

Related

Avoid existing folders and bring only folders that don't exist

I have the code below which is bringing attachments into parent_directory using api connection.
Problem: The code works great but the only problem with this code is it gets stuck when there're existing folders.
Solution: How can make this code bypass the existing folders. So if the folder exists, then don't do anything just move to the next loop.
import pandas as pd
import os
import zipfile
parent_directory = "folderpath"
csv_file_dir = "myfilepath.csv"
user = "API_username"
key = "API_password"
os.chdir(parent_directory)
bdr_data = pd.read_csv(csv_file_dir)
api_first = "… " + user + ":" + key + "…"
for index, row in bdr_data.iterrows():
#print(row['url_attachment'])
name = row['Ref_Num']
os.makedirs(parent_directory + name)
os.chdir(parent_directory + name)
url = api_first + row['url_attachment'] + " -o attachments.zip"
os.system(url)
os.chdir(parent_directory)
You can do it like this.
for index, row in bdr_data.iterrows():
name = row['Ref_Num']
child_dir = (parent_directory + name)
if os.path.exists(child_dir): # check if folder exist.
print(f'{child_dir} already exist') # you may want to know what is skipped
continue # skip iteration.
os.makedirs(child_dir) # if folder not found, do what you need.

How to select a specific mailbox from IMAP server?

I have the following mailboxes on my IMAP server (refer to the attached screenshot).
I want to only select the mailbox Folder1 and check if there are any sub-directories. I already tried the following code:
svr = imaplib.IMAP4_SSL(imap_address)
svr.login(user, pwd)
svr.select('inbox') <<<<<<<<<<<<<<<<<
rv, data = svr.search(None, "ALL")
test, folders = svr.list('""', '*')
print(folders)
I thought changing 'inbox' to 'folder1' (statement indicated with arrows) would select Folder1 and then I can retrieve the sub-directories. But nothing happened and still it shows the same result as 'inbox'.
Can somebody help me understand what I am doing wrong here.
As I would not be knowing the name of folder I tried a different approach. I would first collect all the folders in the root directory and then parse them one by one to check if any sub-directory exists.
root_folders = []
svr = imaplib.IMAP4_SSL(imap_address)
svr.login(user, pwd)
svr.select('inbox')
response, folders = svr.list('""', '*')
def parse_mailbox(data):
flags, b, c = data.partition(' ')
separator, b, name = c.partition(' ')
return flags, separator.replace('"', ''), name.replace('"', '')
def subdirectory(folder):
#For directories 'Deleted Items', 'Sent Items', etc. with whitespaces,
#the name of the directory needs to be passed with double quotes, hence '"' + name + '"'
test, folders = obj.list('""','"' + name+ '/*"')
if(folders is not None):
print('Subdirectory exists') # you can also call parse_mailbox to find the name of sub-directory
for mbox in folders:
flags, separator, name = parse_mailbox(bytes.decode(mbox))
fmt = '{0} : [Flags = {1}; Separator = {2}'
if len(name.split('/')) > 1:
continue
else:
root_folders.append(name)
for folder in root_folders:
subdirectory(folder)
Although this is a tailored code from my script, but this should be the solution for the question put up.

Python relative path problems

I am trying to create some helper functions that will give me a folder based on the relative paths:
def get_BASE_dir_path():
cur_dir = os.path.dirname(os.path.abspath(__file__))
BASE = os.path.abspath(os.path.join(cur_dir,"..",".."))
return BASE
def get_src_dir_path():
BASE = get_BASE_dir_path()
src_dir = os.path.abspath(os.path.join(BASE,"src"))
return src_dir
def get_lib_dir_path():
BASE = get_BASE_dir_path()
lib_dir = os.path.dirname(os.path.join(BASE,"src","lib"))
return lib_dir
def get_ffmpeg_dir_path():
BASE = get_BASE_dir_path()
ffmpeg_dir= os.path.dirname(os.path.join(BASE,"src","lib","ffmpeg"))
return ffmpeg_dir
But, somehow, I am not getting the right results when I print the functions:
Output:
C:\dev\project
C:\dev\project\src
C:\dev\project\src
C:\dev\project\src\lib
What did I miss?
The problem is here, in function get_lib_dir_path()
lib_dir = os.path.dirname(os.path.join(BASE,"src","lib"))
It should be:
lib_dir = os.path.join(BASE,"src","lib")
The same thing happens in get_ffmpeg_dir_path(). By calling dirname(), you chop off the last directory.
I guess it is because you are returning dirname instead of abspath for the last two values.

Python: why is my method using os.walk does not return all available paths?

Question as in title and here's the method:
def walkThroughPath(self , sBasePath, blFolders = True, blFiles = True ):
aPaths = []
for sRootDir, aSubFolders, aFiles in os.walk( sBasePath ):
for sFolder in aSubFolders:
if blFolders == True:
aPaths.append( sRootDir )
for sFileName in aFiles:
if blFiles == True:
aPaths.append( sRootDir + "/" + sFileName )
return aPaths
The method returns a big amount of subfolders and files but definetly not all that I've found.
What's wrong with my method (or is it a wrong usage of os.walk)?
For those who are interested in the Background:
http://www.playonlinux.com/en/topic-10962-centralized_wineprefix_as_preparation_for_debpackages.html
Here are two possibilities:
You don't have permission to read a certain directory.
By default, os.walk does not follow symbolic links. Use the
followlinks=True keyword to follow symbolic links:
os.walk( sBasePath, followlinks=True )
Having skimmed the link you provided, it looks like followlinks=True may be the solution.
both of your hints brought the final solution that looks like that now:
def walkThroughPath(self , sBasePath, blFolders = True, blFiles = True, blFollowSymlinks = True ):
aPaths = []
for sRootDir, aSubFolders, aFiles in os.walk( sBasePath, blFollowSymlinks ):
for sFolder in aSubFolders:
if blFolders == True:
try:
aPaths.index( sRootDir )
blPathExists = True
except:
blPathExists = False
pass
if blPathExists == False:
aPaths.append( sRootDir )
self.logDebug("Append: " + sRootDir )
self.logDebug("Current content of aPaths: \n" + pprint.pformat(aPaths) )
for sFileName in aFiles:
self.logDebug("Current root dir: " + sRootDir )
if blFiles == True:
try:
aPaths.index( sRootDir + "/" + sFileName )
blPathExists = True
except:
blPathExists = False
pass
if blPathExists == False:
aPaths.append( sRootDir + "/" + sFileName )
if blFolders == True:
try:
aPaths.index( sRootDir )
blPathExists = True
except:
blPathExists = False
pass
if blPathExists == False:
aPaths.append( sRootDir )
self.logDebug("Append: " + sRootDir )
self.logDebug("Current content of aPaths: \n" + pprint.pformat(aPaths) )
self.logDebug("Folders: " + str(blFolders) )
self.logDebug("Files : " + str(blFiles) )
self.logDebug("Paths found in " + sBasePath + " : \n" + pprint.pformat(aPaths) )
return aPaths
First I indented incorrectly as Steven said.
os.walk seems to handle the lists not as I expected them to be. Folders of files not nessessarily appear in folders list. This caused many folders I left out just because those folder pathes have been in the files list. Additionally I only checked files only in this limited folders list.
Next I added the follow symlinks flag optional as unutbu suggested. Maybe in my case they could be needed as well eventually.
Those method above is surely a candidate for improvement, but it's at least working :-)
Best,
André
I know that you already solved the problem. But for further references: If you want to walk through dirs as an 32 bit application running on a 64 bit Windows make sure that you check for the redirected directories.
The %windir%\System32 directory is reserved for 64-bit applications. Most DLL file names were not changed when 64-bit versions of the DLLs were created, so 32-bit versions of the DLLs are stored in a different directory. WOW64 hides this difference by using a file system redirector.
File System Redirector on MSDN

Pythonic way to retrieve case sensitive path?

I was wondering if there was a faster way to implement a function that returns a case-sensitive path in python. One of the solutions I came up with works with both linux and windows, but requires that I iterate os.listdir, which can be slow.
This solution works fine for an application and context that does not need plenty of speed:
def correctPath(start, path):
'Returns a unix-type case-sensitive path, works in windows and linux'
start = unicode(start);
path = unicode(path);
b = '';
if path[-1] == '/':
path = path[:-1];
parts = path.split('\\');
d = start;
c = 0;
for p in parts:
listing = os.listdir(d);
_ = None;
for l in listing:
if p.lower() == l.lower():
if p != l:
c += 1;
d = os.path.join(d, l);
_ = os.path.join(b, l);
break;
if not _:
return None;
b = _;
return b, c; #(corrected path, number of corrections)
>>> correctPath('C:\\Windows', 'SYSTEM32\\CmD.EXe')
(u'System32\\cmd.exe', 2)
This however, will not be as fast when the context is gathering filenames from a large 50,000+ entry database.
One method would be to create a dict tree for each directory. Match the dict tree with the directory parts of the path, and if a key-miss occurs, perform an os.listdir to find and create a dict entry for the new directory and remove the unused parts or keep a variable counter as a way to assign a "lifetime" to each directory.
The following is a slight re-write of your own code with three modifications: checking if the filename is already correct before matching, processing the listing to lowercase before testing, using index to find the relevant 'true case' file.
def corrected_path(start, path):
'''Returns a unix-type case-sensitive path, works in windows and linux'''
start = unicode(start)
path = unicode(path)
corrected_path = ''
if path[-1] == '/':
path = path[:-1]
parts = path.split('\\')
cd = start
corrections_count = 0
for p in parts:
if not os.path.exists(os.path.join(cd,p)): # Check it's not correct already
listing = os.listdir(cd)
cip = p.lower()
cilisting = [l.lower() for l in listing]
if cip in cilisting:
l = listing[ cilisting.index(cip) ] # Get our real folder name
cd = os.path.join(cd, l)
corrected_path = os.path.join(corrected_path, l)
corrections_count += 1
else:
return False # Error, this path element isn't found
else:
cd = os.path.join(cd, p)
corrected_path = os.path.join(corrected_path, p)
return corrected_path, corrections_count
I'm not sure if this will be much faster, though there is a little less testing going on, plus the 'already-correct' catch at the beginning may help.
An extended version with case-insensitive caching to pull out the corrected path:
import os,re
def corrected_paths(start, pathlist):
''' This wrapper function takes a list of paths to correct vs. to allow caching '''
start = unicode(start)
pathlist = [unicode(path[:-1]) if path[-1] == '/' else unicode(path) for path in pathlist ]
# Use a dict as a cache, storing oldpath > newpath for first-pass replacement
# with path keys from incorrect to corrected paths
cache = dict()
corrected_path_list = []
corrections_count = 0
path_split = re.compile('(/+|\+)')
for path in pathlist:
cd = start
corrected_path = ''
parts = path_split.split(path)
# Pre-process against the cache
for n,p in enumerate(parts):
# We pass *parts to send through the contents of the list as a series of strings
uncorrected_path= os.path.join( cd, *parts[0:len(parts)-n] ).lower() # Walk backwards
if uncorrected_path in cache:
# Move up the basepath to the latest matched position
cd = os.path.join(cd, cache[uncorrected_path])
parts = parts[len(parts)-n:] # Retrieve the unmatched segment
break; # First hit, we exit since we're going backwards
# Fallback to walking, from the base path cd point
for n,p in enumerate(parts):
if not os.path.exists(os.path.join(cd,p)): # Check it's not correct already
#if p not in os.listdir(cd): # Alternative: The above does not work on Mac Os, returns case-insensitive path test
listing = os.listdir(cd)
cip = p.lower()
cilisting = [l.lower() for l in listing]
if cip in cilisting:
l = listing[ cilisting.index(cip) ] # Get our real folder name
# Store the path correction in the cache for next iteration
cache[ os.path.join(cd,p).lower() ] = os.path.join(cd, l)
cd = os.path.join(cd, l)
corrections_count += 1
else:
print "Error %s not in folder %s" % (cip, cilisting)
return False # Error, this path element isn't found
else:
cd = os.path.join(cd, p)
corrected_path_list.append(cd)
return corrected_path_list, corrections_count
On an example run for a set of paths, this reduces the number of listdirs considerably (this is obviously dependent on how alike your paths are):
corrected_paths('/Users/', ['mxF793/ScRiPtS/meTApaTH','mxF793/ScRiPtS/meTApaTH/metapAth/html','mxF793/ScRiPtS/meTApaTH/metapAth/html/css','mxF793/ScRiPts/PuBfig'])
([u'/Users/mxf793/Scripts/metapath', u'/Users/mxf793/Scripts/metapath/metapath/html', u'/Users/mxf793/Scripts/metapath/metapath/html/css', u'/Users/mxf793/Scripts/pubfig'], 14)
([u'/Users/mxf793/Scripts/metapath', u'/Users/mxf793/Scripts/metapath/metapath/html', u'/Users/mxf793/Scripts/metapath/metapath/html/css', u'/Users/mxf793/Scripts/pubfig'], 5)
On the way to this I realised the on Mac OSX Python returns path matches as if they are case-insensitive, so the test for existence always succeeds. In that case the listdir can be shifted up to replace it.

Categories

Resources