get Path from xml-files (Python) - python

I have 300 XML files, in each file there is a path (see code) and i want to make a list (.CSV) of this Paths with Python.
<da:AdminData>
<da:Datax />
<da:DataID>223</da:DataID>
<da:Date>2013-08-19</da:Date>
<da:Time>13:27:25</da:Time>
<da:Modification>2013-08-19</da:Modification>
<da:ModificationTime>13:27:25</da:ModificationTime>
**<da:Path>D:\08\06\xxx-aaa_20130806_111339.dat</da:Path>**
<da:ID>xxx-5225-fff</da:ID>
I wrote the following code, but does not work for subdirectories
import os, glob, re, time, shutil
xmlpath = r'D:'
outfilename = "result.csv"
list = glob.glob(os.path.join(xmlpath,'*.xml'))
output = ""
for file in list :
fh = open(file)
text = fh.read()
pattern = "<da:Path>(.*)</da:Path>"
pattern = re.compile(pattern);
a = pattern.search(text)
if a:
output += '\n' + a.group(1)
logfile = open(outfile, "w")
logfile.write(output)
logfile.close()

To glob recursively, it is best to use a combination of os.walk and fnmatch.fnmatch. Example:
import os
import fnmatch
def recursive_glob(rootdir, pattern):
matching_files = []
for d, _, fnames in os.walk(rootdir):
matching_files.extend(
os.path.join(d, fname) for fname in fnames
if fnmatch.fnmatch(fname, pattern)
)
return matching_files
xmlfiles = recursive_glob(r"D:\", "*.xml")

Related

How to detect folder path i ran script for?

the below script is to unzip zip file and rename extaracted subtitles names according to tv show episodes names. and then convert them to utf-8.
here is the problem:
I want to run this script in a linux os and inside any tv show folder i want.but I want the path in fixing function to be detected from the folder itselt i run the python script for because it is not a constant path, there are many tv show folders.
sorry for my english
import zipfile
import os
import re
from chardet import detect
import fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
def fixing(path):
oldsrtfiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(oldsrtfiles), ' old subtitles found')
if len(oldsrtfiles) != 0:
for oldsrt in oldsrtfiles:
os.remove(f'{path}{oldsrt}')
print(f'{oldsrt} removed')
filename = find('*.zip', path)[0]
with zipfile.ZipFile(f'{filename}',"r") as zip_ref:
zip_ref.extractall(path)
print('files extarcted')
os.remove(f'{filename}')
print("Zip File Removed!")
newsrtFiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(newsrtFiles), ' subtitles found')
showsTitles = [f for f in os.listdir(path) if '.mkv' in f or '.avi' in f or '.mp4' in f]
print(len(showsTitles), ' tv show found')
pattern = r'S(\d{1,2})E(\d{1,2})'
for show in showsTitles:
SEneeded = re.search(pattern, show).group(0)
for i, sub in enumerate(newsrtFiles):
if SEneeded in sub:
if sub[-3:] == 'srt':
newsrtFiles[i] = show.replace(show[-3:],'ar.srt')
os.rename(f'{path}{sub}',f'{path}{newsrtFiles[i]}')
elif sub[-3:] == 'ass':
subs[i] = show.replace(show[-3:],'ar.ass')
forencoding = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
for newsrtfile in forencoding:
from_codec = get_encoding_type(f'{path}{newsrtfile}')
print(from_codec)
correctSubtitleEncoding(f'{path}{newsrtfile}', f'{path}{newsrtfile}', from_codec, encoding_to='UTF-8')
function to get the current working directory
os.getcwd()
to call this function you need to import os module
import os

Python - copy specific file from subfolder to destination, get filename from text file

I want to get my script to read a list of names from a list(txt), then search for those in a selected folder with subfolders, then copy and paste those files to another selected folder. My script running without error but no result.
My script:
import os
import os.path
import shutil
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\\Test")
destinationFolder = ("D:\\")
filesToFind = []
with open(textFile, "r") as tx:
for row in tx:
filesToFind.append(row.strip())
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
Haven’t test it but I think this will work - change this:
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
To this:
for root, dirs, filenames in os.walk(sourceFolder):
for filename in filenames:
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
# Same code using glob #
## More efficient and also tested one ##
## One more feature added- checks file name given present or not ##
import os
import os.path
import shutil
import glob
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\Test")
destinationFolder = ("D:\\")
f = open(textFile, "r").readlines()
for i in f:
ListFile= glob.glob(os.path.join(sourceFolder,"**",i.strip()),recursive=True)
if len(ListFile):
print(ListFile[0],destinationFolder,os.path.basename(ListFile[0]))
destinationfile=os.path.join(destinationFolder,os.path.basename(ListFile[0]))
shutil.copyfile(ListFile[0],destinationfile)
else:
print(i,"-File not found")

Move file to new directory only if it contains specified string

I have 1 folder with thousands of files and I need to loop through every single file and see if that file contains a specific string, once it has concluded that it has a specific string, it must then be moved to the correct folder. So far I have:
for filename in glob.iglob('*.txt'):
f = open(filename)
s = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
if s.find('* Test Outcome : FAIL') != -1:
src_file = os.path.join(dirSTART, filename)
dst_file = os.path.join(dirFAIL, filename)
shutil.move(src_file, dst_file + filename)
At the moment, it only moves the first file but I know for a fact there's more.
Thanks
You can use the os module alone to do this.
import os
import shutil
source_dir = "this/is/source/folder"
dest_dir = "this/is/destination/folder"
for top, dirs, files in os.walk(source_dir):
for filename in files:
if not filename.endswith('.txt'):
continue
file_path = os.path.join(top, filename)
with open(file_path, 'r') as f:
if '* Test Outcome : FAIL' in f.read():
shutil.move(file_path, os.path.join(dest_dir, filename))
CAUTION: Since I don't know much about your code, I am assuming all of the files are txt, that they are small and the string you are matching will always be the same.
from re import compile
pattern = compile("\* Test Outcome : FAIL")
for filename in glob.iglob('*.txt'):
fl = open(filename, 'r')
for i in fl.readlines():
if pattern.search(i):
fl.close()
src_file = os.path.join(dirSTART, filename)
dst_file = os.path.join(dirFAIL, filename)
shutil.move(src_file, dst_file + filename)
break #To stop checking other lines
Use a contextmanager and with to open your files so they will be closed each time:
from mmap import mmap, ACCESS_READ
import contextlib
from os import path
from shutil import move
for filename in glob.iglob('*.txt'):
with open(filename) as f:
with contextlib.closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as s:
if s.find('* Test Outcome : FAIL') != -1:
src_file = path.join(dirSTART, filename)
dst_file = path.join(dirFAIL, filename)
move(src_file, dst_file)
Try to do f.close() after s = mmap.mmap(...)
Are you on Linux? If so, might be quicker to this in a shell command with grep and mv.

I have a ".txt "file which consists of various filenames and I want to search each filename in a folder where these files are actually kept

Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))

Remove lines of files in different directories

I'd like to remove the first line and the last second line of files which exits in different sub directories in the same root directory. And the codes as below
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
f = open(file,'r')
lines = f.readlines()
f.close()
f = open(file,'w')
f.writelines(lines[1:-2])
f.close()
But, when it found the file, the error happened saying no the file which has already been found.
Correct me if it does not work:
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
for filename in files:
filepath = os.path.join(dirpath, filename)
f = open(filepath,'r')
lines = f.readlines()
f.close()
f = open(filepath,'w')
f.writelines(lines[1:-2])
f.close()

Categories

Resources