How to extract the file name from a file path? - python

I have the following code:
os.listdir("staging")
# Seperate filename from extension
sep = os.sep
# Change the casing
for n in os.listdir("staging"):
print(n)
if os.path.isfile("staging" + sep + n):
filename_one, extension = os.path.splitext(n)
os.rename("staging" + sep + n, "staging" + sep + filename_one.lower() + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
# Remove the blanks, -, %, and /
for n in os.listdir("staging"):
print (n)
if os.path.isfile("staging" + sep + n):
filename_zero, extension = os.path.splitext(n)
os.rename("staging" + sep + n , "staging" + sep + filename_zero.replace(' ','_').replace('-','_').replace('%','pct').replace('/','_') + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
"""
In order to fix all of the column headers and to solve the encoding issues and remove nulls,
first read in all of the CSV's to python as dataframes, then make changes and rewrite the old files
"""
import os
import glob
import pandas as pd
files = glob.glob(os.path.join("staging" + "/*.csv"))
print(files)
# Create an empty dictionary to hold the dataframes from csvs
dict_ = {}
# Write the files into the dictionary
for file in files:
dict_[file] = pd.read_csv(file, header = 0, dtype = str, encoding = 'cp1252').fillna('')
In the dictionary, the dataframes are named as "folder/name(csv)" what I would like to do is remove the prefix "staging/" from the keys in the dictionary.
How can I do this?

If all you want to do is truncate the file paths to just the filename, you can use os.path.basename:
for file in files:
fname = os.path.basename(file)
dict_[fname] = (pd.read_csv(file, header=0, dtype=str, encoding='cp1252')
.fillna(''))
Example:
os.path.basename('Desktop/test.txt')
# 'test.txt'

import os
pathname ='c:\\hello\\dickins\\myfile.py'
head, tail = os.path.split(pathname)
print head
print tail

This article here worked out just fine for me
import os
inputFilepath = 'path/to/file/foobar.txt'
filename_w_ext = os.path.basename(inputFilepath)
filename, file_extension = os.path.splitext(filename_w_ext)
#filename = foobar
#file_extension = .txt
path, filename = os.path.split(path/to/file/foobar.txt)
# path = path/to/file
# filename = foobar.txt
Hope it helps someone searching for this answer

In the same spirt as truncate the file paths, use pathlib in python standard library. It will turn the path into an easy to use class.
from pathlib import Path
path = Path('Desktop/folder/test.txt')
path.name # test.txt
path.stem # test
path.suffix # .txt
path.parent.name # folder
path.parent.name.name # Desktop

As ColdSpeed said, you can use "os.path.basename" to truncate a file to its name, but I think what you are refering to is the ability to pycache the data?
For Example here is my Directory:
You see the pycache folder? that initializes it as a module.
Then, you can import a file from that module (for example the staging.txt file and operate on it.)
I use the IpConfig.txt File from the assets folder level (or should be) and take a line of information out of it.
import pygame as pyg
import sys
import os
import math
import ssl
import socket as sock
import ipaddress as ipad
import threading
import random
print("Modules Installed!")
class two:
# Find out how to refer to class super construct
def main(Display, SecSock, ipadd, clock):
# I have code here that has nothing to do with the question...
def __init__():
print("Initializing[2]...")
# Initialization of Pygame and SSL Socket goes here
searchQuery = open("IpConfig.txt", 'r') #Opening the File IpConfig(Which now should open on the top level of the game files)
step2 = searchQuery.readlines()# read the file
ipadd = step2[6] # This is what you should have or something similar where you reference the line you want to copy or manipulate.
main(gameDisplay, SSLSock, ipadd, clock)# Im having issues here myself - (main() is not defined it says)
print(ipadd)
print("Server Certificate Configuration Enabled...")
__init__() # Start up the procedure

Related

Renaming multiple csv files within a folder in Python

I have a folder with 50 .csv files. The .csv files are auto-generated and a results/ output from a process-based model (long and automatically named). For example, sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv, and it continues till sandbox_username_vetch_scaleup_IA_50.csv.
I am trying to shorten the file names in a way so that the files are names are IA_1, IA_2 ...up to IA_50 and subsequently the new .csv file name gets added as a column to the data frame. Here is what I have tried so far
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
Here is the code for adding the csv file name as a column
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#However, this adds the old csv file name and not the renamed one
In order to rename and move these files, all you need is:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
As there's nothing in the OP's question that tries to handle modification of the CSV files, that is left as an exercise for the OP.
Source and target directories should be different otherwise this can lead to ambiguous results

Find files within a changed subdirectory in Python

I have a text-file full of filenames. Like:
C:\Folder\Subfolder_01\file_1001.csv
C:\Folder\Subfolder_02\file_3030.xls
...
I want to check whether the files still exists (which is easy) or if the name of the subfolder has changed. The name of some subfolders changed by adding some string in front of it (starting with a 4 digit number e.g C:\Folder\Subfolder_02\file_3030.xls has changed to C:\Folder\2019 - Subfolder_02\file_3030.xls).
I tried to solve this with pathlib.glob(). It's possible to do this for one specific file 'by hand' like
list(file.parent.parent.glob('* - Subfolder_02\file_3030.xls'))
which returns a list with the new file-name. But i failed to do this in a loop surrounding the glob with parameters.
This is what I got so far, but my attempt to concatenate the glob with other variables (using +) failes for obvious reasons:
import pathlib
file = pathlib.Path(file_names.txt)
lines=[]
with open(file,'r') as f:
# reading the txt-file line by line
for line in f:
line = line.replace("\r", "").replace("\n", "")
lines.append(line)
for file in lines:
file = pathlib.Path(file)
# check if file exists ...
if file.exists():
print('OK - ' + file.name)
# ... if not, find new location
else:
new_files = list(file.parent.parent.glob('* - ') + file.name)
print(files_files)
I would set your top directory as a path and use that to glob the files under the directory if you can't find the file in its original location. Using ** in the glob will search all folders.
# Set top level directory as desired.
parent_dir = Path('.')
# you can use splitlines() to parse the file into a list
with Path('file_names.txt').open() as f:
files = f.read().splitlines()
for f in files:
orig = Path(f)
# Still in location, no need to look further
if orig.exists():
print(f"{orig.absolute()} is still in place.")
continue
# See if we can find it under parent_dir
matches = [*parent_dir.glob(f"**/{orig.name}")]
if len(matches) > 1:
print("Multiple Matches Found")
for match in matches:
print(f"{orig.absolute()} might be in {match.absolute()}")
Try watchdog
For example:
import os
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
RESOURCES_PATH = "C:\Folder"
class dirs_watcher(FileSystemEventHandler):
def __init__(self):
self.observe()
self.cur_dirs = os.listdir(RESOURCES_PATH)
def observe(self):
self.observer = Observer()
self.my_watch = self.observer.schedule(self, path=RESOURCES_PATH, recursive=True)
self.observer.start()
def on_modified(self, event=None):
# A folder was modified:
self.new_dirs = os.listdir(RESOURCES_PATH)
old = set(self.cur_dirs) - set(self.new_dirs)
new = set(self.new_dirs) - set(self.cur_dirs)
print("{} changed to {}".format(old, new))
self.cur_dirs = self.new_dirs # update cur_dirs
on_modified will be triggered when a sub directory changes and you can extract the changed folders names by keeping a sub directories list

how to sort out files according to excel

I have an Excel file that contains long product tag name like(for now, just working on 3 of them):
4049047000037
4049047000044
4049047118954
and i have a folder on my desktop called "1" containing .jpg files with tag names like:
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
i want to write a code, if tag name in my excel, i want to copy that .jpg file to an other folder.
import os
import pandas as pd
movdir = ["C:\Users\muhammedcan\Desktop\1"]
basedir = "C:\Users\muhammedcan\Desktop\2"
i=0
#to see what i have in my folder
print os.listdir("C:/Users/muhammedcan/Desktop/1/")
df= pd.read_excel("deneme.xls", sheetname= "sayfa4")
df1= df.columns[1]
listSepalWidth = df[df1]
print listSepalWidth
#to make file name and product tag name same
for i in listSepalWidth:
i=str(i)+(".jpg")
print i
can you help me with copying file into an other file if it is exist in my excel?
this is my result so far:
['4049047000037.jpg', '4049047000044.jpg', '4049047000068.jpg',
'4049047000075.jpg', '4049047000082.jpg', '4049047000105.jpg',
'4049047118947.jpg', '4049047118954.jpg']
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
4049047000068.jpg
4049047000075.jpg
4049047000082.jpg
4049047118947.jpg
4049047000105.jpg
I used following code, and I am recieving error.
from shutil import copyfile
copyfile("C:\Users\muhammedcan\Desktop\1", "C:\Users\muhammedcan\Desktop\2")
Error is:
C:\Python27\python.exe "C:/Users/muhammedcan/Desktop/summer
courses/programing/MP4/solution/my_work.py"
Traceback (most recent call last):
File "C:/Users/muhammedcan/Desktop/summer courses/programing/MP4/solution/my_work.py", line 3, in <module>
copyfile("C:\Users\muhammedcan\Desktop\1",
Process finished with exit code 1
The following should do what you are looking for:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.jpg'))]
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
for tag_name in df.iloc[:,1]:
filename = "{}.jpg".format(tag_name)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
If first creates a list of .jpg filenames found in the source_folder. It then loads the Excel file into pandas and iterates over the second column. If the tag name is found in the list of available_filenames the shutil.copyfile() function is used to copy the file from 1 to 2. Note os.path.join() is used to safely join parts of a file together.
To make it into a function to let you also do 'pdf' you could do:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
def copy_files(source_folder, destination_folder, extension):
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.{}'.format(extension)))]
for tag_name in df.iloc[:,1]:
filename = "{}.{}".format(tag_name, extension)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
copy_files(source_folder, destination_folder, 'jpg')
copy_files(source_folder, destination_folder, 'pdf')
This assumes the same deneme.xls is used for both. If not it could be passed as another argument to the function.

Run file inside a zipfile?

Is it possible to run a .html or .exe for example, that is inside a zipfile? I'm using the Zipfile module.
Here's my sample code:
import zipfile
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
for filename in z.namelist():
#print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
g = filename
f = z.open(g)
After f = z.open(g), I don't know what to do next. I tried using the .read() but it only reads whats inside of the html, what I need is for it to run or execute.
Or is there any othere similar ways to do this?
The best approach will be to extract the required file to the Windows temp directory and execute it. I have modified your original code to create a temp file and execute it:
import zipfile
import shutil
import os
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
basename = ""
for filename in z.namelist():
print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
basename = os.path.basename(filename) #get the file name and extension from the return path
g = filename
print basename
break #found what was needed, no need to run the loop again
f = z.open(g)
temp = os.path.join(os.environ['temp'], basename) #create temp file name
tempfile = open(temp, "wb")
shutil.copyfileobj(f, tempfile) #copy unzipped file to Windows 'temp' folder
tempfile.close()
f.close()
os.system(temp) #run the file
Run the first .html file in a zip archive specified at the command line:
#!/usr/bin/env python
import os
import shutil
import sys
import tempfile
import webbrowser
import zipfile
from subprocess import check_call
from threading import Timer
with zipfile.ZipFile(sys.argv[1], 'r') as z:
# find the first html file in the archive
member = next(m for m in z.infolist() if m.filename.endswith('.html'))
# create temporary directory to extract the file to
tmpdir = tempfile.mkdtemp()
# remove tmpdir in 5 minutes
t = Timer(300, shutil.rmtree, args=[tmpdir], kwargs=dict(ignore_errors=True))
t.start()
# extract the file
z.extract(member, path=tmpdir)
filename = os.path.join(tmpdir, member.filename)
# run the file
if filename.endswith('.exe'):
check_call([filename]) # run as a program; wait it to complete
else: # open document using default browser
webbrowser.open_new_tab(filename) #NOTE: returns immediately
Example
T:\> open-from-zip.py file.zip
As an alternative to webbrowser you could use os.startfile(os.path.normpath(filename)) on Windows.

python os.rename(...) won't work !

I am writing a Python function to change the extension of a list of files into another extension, like txt into rar, that's just an idle example. But I'm getting an error. The code is:
import os
def dTask():
#Get a file name list
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename(file_list[entry_pos], new_file_name)
++entry_pos
Error:
>>> dTask()
Expected change from: New Text Document (2).txt
into File name: New Text Document (2).rar
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
dTask()
File "C:\Users\B\Desktop\dTask.py", line 19, in dTask
os.rename(file_list[entry_pos], new_file_name)
WindowsError: [Error 2] The system cannot find the file specified
I can succeed in getting the file name with another extension in variable level as you can see in the print-out, but not in reality because I can not end this process in OS level. The error is coming from os.rename(...). Any idea how to fix this ?
As the others have already stated, you either need to provide the path to those files or switch the current working directory so the os can find the files.
++entry_pos doesn't do anything. There is no increment operator in Python. Prefix + is just there fore symmetry with prefix -. Prefixing something with two + is just two no-ops. So you're not actually doing anything (and after you change it to entry_pos += 1, you're still resetting it to zero in each iteration.
Also, your code is very inelegant - for example, you are using a separate index to file_list and fail to keep that in synch with the iteration variable file_name, even though you could just use that one! To show how this can be done better.
-
def rename_by_ext(to_ext, path):
if to_ext[0] != '.':
to_ext = '.'+to_ext
print "Renaming files in", path
for file_name in os.listdir(path):
root, ext = os.path.splitext(file_name)
print "Renaming", file_name, "to", root+ext
os.rename(os.path.join(path, file_name), os.path.join(path, root+to_ext))
rename_by_ext('.rar', '...')
os.rename really doesn't like variables. Use shutil. Example taken from How to copy and move files with Shutil.
import shutil
import os
source = os.listdir("/tmp/")
destination = "/tmp/newfolder/"
for files in source:
if files.endswith(".txt"):
shutil.move(files,destination)
In your case:
import shutil
shutil.move(file_list[entry_pos], new_file_name)
You also want to double backslashes to escape them in Python strings, so instead of
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
you want
file_list = os.listdir('C:\\Users\\B\\Desktop\\sil\\sil2')
Or use forward slashes - Python magically treats them as path separators on Windows.
You must use the full path for the rename.
import os
def dTask():
#Get a file name list
dir = 'C:\Users\B\Desktop\sil\sil2'
file_list = os.listdir(dir)
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename( os.path.join(dir, file_list[entry_pos]), os.path.join(dir,new_file_name))
++entry_pos
If you aren't in the directory C:\Users\B\Desktop\sil\sil2, then Python certainly won't be able to find those files.
import os
def extChange(path,newExt,oldExt=""):
if path.endswith != "\\" and path.endswith != "/":
myPath = path + "\\"
directory = os.listdir(myPath)
for i in directory:
x = myPath + i[:-4] + "." + newExt
y = myPath + i
if oldExt == "":
os.rename(y,x)
else:
if i[-4:] == "." + oldExt:
os.rename(y,x)
now call it:
extChange("C:/testfolder/","txt","lua") #this will change all .txt files in C:/testfolder to .lua files
extChange("C:/testfolder/","txt") #leaving the last parameter out will change all files in C:/testfolder to .txt

Categories

Resources