I'm trying to replicate the exporting of a Code Module from an Excel sheet in Python.
The following works in VBA:
Public Sub ExportModules()
Dim wb As Workbook
Set wb = ThisWorkbook
Dim D As String
Dim N
D = ThisWorkbook.Path
For Each VBComp In wb.VBProject.VBComponents
If (VBComp.Type = 1) Then
N = D + "\" + VBComp.Name + ".txt"
VBComp.Export N
End If
Next
End Sub
And I have the following in Python:
import os
import sys
import glob
from win32com.client import Dispatch
scripts_dir = 'folder address'
com_instance = Dispatch("Excel.Application")
com_instance.Visible = False
com_instance.DisplayAlerts = False
for script_file in glob.glob(os.path.join(scripts_dir, "*.xlsm")):
print "Processing: %s" % script_file
(file_path, file_name) = os.path.split(script_file)
objworkbook = com_instance.Workbooks.Open(script_file)
for xlmodule in objworkbook.VBProject.VBComponents:
xlmodule.Export('export file name')
My question is, what do I have to do in Python to replicate the Export of the file as per the VBA code?
Use the default oletools xltrails provides a good way to extract .bas files from .xlsm or other excel files
import os
import shutil
from oletools.olevba3 import VBA_Parser
EXCEL_FILE_EXTENSIONS = ('xlsb', 'xls', 'xlsm', 'xla', 'xlt', 'xlam',)
def parse(workbook_path):
vba_path = workbook_path + '.vba'
vba_parser = VBA_Parser(workbook_path)
vba_modules = vba_parser.extract_all_macros() if vba_parser.detect_vba_macros() else []
for _, _, _, content in vba_modules:
decoded_content = content.decode('latin-1')
lines = []
if '\r\n' in decoded_content:
lines = decoded_content.split('\r\n')
else:
lines = decoded_content.split('\n')
if lines:
name = lines[0].replace('Attribute VB_Name = ', '').strip('"')
content = [line for line in lines[1:] if not (
line.startswith('Attribute') and 'VB_' in line)]
if content and content[-1] == '':
content.pop(len(content)-1)
lines_of_code = len(content)
non_empty_lines_of_code = len([c for c in content if c])
if non_empty_lines_of_code > 0:
if not os.path.exists(os.path.join(vba_path)):
os.makedirs(vba_path)
with open(os.path.join(vba_path, name + '.bas'), 'w') as f:
f.write('\n'.join(content))
if __name__ == '__main__':
for root, dirs, files in os.walk('.'):
for f in dirs:
if f.endswith('.vba'):
shutil.rmtree(os.path.join(root, f))
for f in files:
if f.endswith(EXCEL_FILE_EXTENSIONS):
parse(os.path.join(root, f))
I have tried it and it works great.
Ref: https://www.xltrail.com/blog/auto-export-vba-commit-hook
Related
I need to remove sheets that names are in array. Unfortunetely this: tempWb.remove(wsToRemoveNameArray[wsToRemoveIndex]) , and this:
del tempWb[wsToRemoveNameArray[wsToRemoveIndex]] dont want to work with my code:
Anyone know how to deal with it?
def splitExcelFiles(InputPath, OutputPath, fileNameArray):
for file in range(0, len(fileNameArray)):
tempFile = InputPath + '\\' +fileNameArray[file]
tempWb = load_workbook(tempFile)
wsToRemoveNameArray = []
if(len(tempWb.sheetnames)==1):
#new wb
tempWb.save(str(OutputPath) + '\\' + str(tempWb.sheetnames) + '.xlsx')
else:
for ws in range (0,len(tempWb.sheetnames)):
newName = tempWb.sheetnames[ws]
wsToRemoveNameArray = []
#copyWs = tempWb.copy_worksheet[ws]
# #This section will save the names to remove other sheets from ws
for wsToRemoveName in range (0,len(tempWb.sheetnames)):
if newName != tempWb.sheetnames[wsToRemoveName]:
#print(tempWb.sheetnames[wsToRemoveName])
wsToRemoveNameArray.append(str(tempWb.sheetnames[wsToRemoveName]))
for wsToRemoveIndex in range (0, len(wsToRemoveNameArray)):
# tem
#tempWb.remove(wsToRemoveNameArray[wsToRemoveIndex])
#del tempWb[wsToRemoveNameArray[wsToRemoveIndex]]
# tempWb.
# print(len(wsToRemoveNameArray))
tempWb.save(str(OutputPath) + '\\' + newName + '.xlsx')
First things first, some general tips:
Use the pathlib library whenever you deal with Paths. It makes things a lot easier. There is never a good reason to include the path delimiter in your code.
In python, it's common to write variables and functions with an underscore: save_path instead of savePath
Now that we have that out of the way, here is a tested example. Just change the directories to match yours.
from openpyxl import load_workbook, Workbook
from pathlib import Path
import shutil
def make_absolute_path(path, name, suffix=".xlsx"):
input_file_path = path / name
return input_file_path.with_suffix(suffix)
def remove_all_sheets_except_filename(input_path, output_path, filenames):
output_files_path = []
for i in range(0, len(filenames)):
input_file_path = make_absolute_path(input_path, filenames[i])
output_file_path = make_absolute_path(output_path, filenames[i])
if not Path.is_file(input_file_path):
print(f"Skipping {input_file_path}: Not valid file " f"path. ")
continue
shutil.copyfile(input_file_path, output_file_path)
wb_source: Workbook = load_workbook(filename=output_file_path)
sheets = wb_source.worksheets
if len(sheets) == 1:
save_path = make_absolute_path(output_path, str(wb_source.sheetnames[0]))
wb_source.save(save_path)
output_files_path.append(str(save_path))
else:
for sheet in wb_source.sheetnames:
if not sheet == input_file_path.stem:
wb_source.remove(wb_source[sheet])
if len(wb_source.worksheets) == 1:
save_path = make_absolute_path(
output_path, str(wb_source.sheetnames[0])
)
wb_source.save(save_path)
output_files_path.append(str(save_path))
else:
print(
f"Failed to process {input_file_path} with following "
f"sheets: {','.join(wb_source.worksheets)}."
)
raise ValueError("")
return output_files_path
def main():
# Adjust to where you have the xlsx files and where you want them
input_directory = Path("path/to/your/xlsx/files")
output_directory = Path("path/to/the/output/directory")
file_names = ["input", "foo", "bar"]
paths = remove_all_sheets_except_filename(
input_directory, output_directory, file_names
)
if __name__ == "__main__":
main()
``
I want to change filename for all my files in a folder. They all end with a date and time like "filename 2019-05-20 1357" and I want the date first for all files. How can I do that simplest way?
#!/usr/bin/python3
import shutil, os, re
r = re.compile(r"^(.*) (\d{4}-\d{2}-\d{2} \d{4})$")
for f in os.listdir():
m = r.match(f)
if m:
shutil.move(f, "{} {}".format(m.group(2), m.group(1)))
Quick and roughly tested version
Here is my Implementation:
from datetime import datetime
import os
path = '/Users/name/desktop/directory'
for _, file in enumerate(os.listdir(path)):
os.rename(os.path.join(path, file), os.path.join(path, str(datetime.now().strftime("%d-%m-%Y %H%M"))+str(file)))
Output Format:
20-05-2019 1749filename.ext
import os
import re
import shutil
dir_path = '' # give the dir name
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))
Below is the data in CFS_Config.txt. What this textfile does is to know where the documents have stored and to avoid hardcodes in the program. For instance, if the program is moved to other environment, we only need to change the directory paths in the CFS_Config.txt file.
Folder Path = ../dataprep/source_documents
ED Notes name = ED Notes
ED Notes output = ../dataprep/ED_Notes
This below codes in a python file what it actually does is to read configuration from the CFS_Config.txt mentioned earlier and also to do an auto generated textfile.
The problem encountered is that they tell me the ../dataprep/ED_Notes path was not found. Please do take a look at the codes if need more codes I will try my best to provide, thanks!!! :((
from preprocessing import ednotes_extractor
import os
def read_config():
# open existing file to read configuration
cfs_config_txt = open("..\CFS_Config.txt", "r")
file_list = []
root_dir = ""
ednotes_name = ""
ednotes_output = ""
for line in cfs_config_txt:
file_list.append(line)
if "Folder Path = " in file_list[0]:
root_dir = str(file_list[0])
root_dir = root_dir.replace("Folder Path = ", "")
root_dir = root_dir.replace("\n", "")
if "ED Notes name = " in file_list[1]:
ednotes_name = str(file_list[1])
ednotes_name = ednotes_name.replace("ED Notes name = ", "")
ednotes_name = ednotes_name.replace("\n", "")
if "ED Notes output = " in file_list[2]:
ednotes_output = str(file_list[2])
ednotes_output = ednotes_output.replace("ED Notes output = ", "")
ednotes_output = ednotes_output + ".txt"
ednotes_output = ednotes_output.replace("\n", "")
return root_dir, ednotes_name, ednotes_output
def convert_txt(choices):
root_dir, ednotes_name, ednotes_output = read_config()
if(choices == 1):
# open new file to write string data textfile
text_file = open(ednotes_output, 'w', encoding='utf-8')
text_file.write("cat_id|content\n")
for filename in os.listdir(root_dir):
source_directory = root_dir + '/' + filename
arr = ednotes_extractor.get_ednotes(source_directory)
# open existing file to append the items in the array to the previously written textfile
text_file = open(ednotes_output, 'a', encoding='utf-8')
for item in arr:
text_file.write("%s\n" % item)
elif(choices==2):
print("Hi")
I'm working on a program to split excel files into sections of 1000. I can't seem to get it to create a second excel file, as xlsxwriter doesn't create the second file.
from os.path import join, dirname, abspath
from xlrd.sheet import ctype_text
import csv
import os
import sys
import xlrd
import xlsxwriter
import xlwt
file_paths = sys.argv[1:]
draganddrop = ''.join(file_paths)
beginGrab = 0
counting = 0
endGrab = 1000
thousands = 0
if draganddrop == "":
fileName = raw_input("\nInput the file with extension\n>")
else:
fileName = draganddrop
stopPoint = fileName.index('.')
prepRev = fileName[stopPoint:]
preName = fileName[:stopPoint]
if prepRev == ".csv":
excelFile = xlsxwriter.Workbook(preName + '.xlsx')
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
content = csv.reader(f)
for index_col, data_in_col in enumerate(content):
for index_row, data_in_cell in enumerate(data_in_col):
worksheet.write(index_col,index_row,data_in_cell)
excelFile.close()
fileName = (preName + '.xlsx')
delMe = 1
print("Temporary Convert to xlsx done.\n")
stopPoint = fileName.index('.')
prepRev = fileName[0:stopPoint]
fname = join(dirname(abspath(__file__)), fileName)
xl_workbook = xlrd.open_workbook(fname)
sheet_names = xl_workbook.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
book = xlwt.Workbook(encoding="utf-8")
worksheet = book.add_sheet("Results", cell_overwrite_ok=True)
workbook = xlrd.open_workbook(fileName)
for sheet in workbook.sheets():
for row in range(sheet.nrows):
row = int(row)
if(int(row)>1000):
subDivide = int(row) / 1000
while(thousands != subDivide + 1):
thousands = thousands + 1
counting = 0
totalName = preName + "_" + str(thousands) + ".xlsx"
print(totalName)
excelFile = xlsxwriter.Workbook(str(totalName))
worksheet = excelFile.add_worksheet()
with open(totalName,'rb') as f:
col = xl_sheet.col_slice(0,1,10101010)
for idx, cell_obj in enumerate(col, start=beginGrab):
counting = counting + 1
if(counting == 1000):
break
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
cell_obj_str = str(cell_obj)
telePhone = (cell_obj_str[7:19])
worksheet.write(idx+1, 0, "1" + telePhone)
worksheet.write(0,0, "Telephone Number")
beginGrab = thousands * 1000
endGrab = beginGrab + 1000
excelFile.close()
excelFile = None
else:
print("Mate, this is Tiny!")
print ("Ding! Job Done!")
I've been rubber ducking this and I can't find where I'm at fault.
EDIT:
SOLVED!!
By creating a sheet and then closing it, the program can then grasp it. I will probably make a git issue about this.
if prepRev == ".csv":
totalName = preName + '.xlsx'
excelFile = xlsxwriter.Workbook(totalName)
excelFile.close()
Closing it lets open see it while it still contains the same info.
excelFile = xlsxwriter.Workbook(totalName)
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
Doesn't the save/close line need to be within the while loop? Otherwise it looks like it will only save either the first/last item:
while(thousands != subDivide + 1):
# write file
excelFile.close()
that line is probably the reason why you cannot read back your file and your script crashes:
fname = join(dirname(abspath('__file__')), '%s' % fileName)
'__file__' shouldn't have quotes. I'd do:
fname = join(dirname(abspath(__file__)), fileName)
I have 300 XML files, in each file there is a path (see code) and i want to make a list (.CSV) of this Paths with Python.
<da:AdminData>
<da:Datax />
<da:DataID>223</da:DataID>
<da:Date>2013-08-19</da:Date>
<da:Time>13:27:25</da:Time>
<da:Modification>2013-08-19</da:Modification>
<da:ModificationTime>13:27:25</da:ModificationTime>
**<da:Path>D:\08\06\xxx-aaa_20130806_111339.dat</da:Path>**
<da:ID>xxx-5225-fff</da:ID>
I wrote the following code, but does not work for subdirectories
import os, glob, re, time, shutil
xmlpath = r'D:'
outfilename = "result.csv"
list = glob.glob(os.path.join(xmlpath,'*.xml'))
output = ""
for file in list :
fh = open(file)
text = fh.read()
pattern = "<da:Path>(.*)</da:Path>"
pattern = re.compile(pattern);
a = pattern.search(text)
if a:
output += '\n' + a.group(1)
logfile = open(outfile, "w")
logfile.write(output)
logfile.close()
To glob recursively, it is best to use a combination of os.walk and fnmatch.fnmatch. Example:
import os
import fnmatch
def recursive_glob(rootdir, pattern):
matching_files = []
for d, _, fnames in os.walk(rootdir):
matching_files.extend(
os.path.join(d, fname) for fname in fnames
if fnmatch.fnmatch(fname, pattern)
)
return matching_files
xmlfiles = recursive_glob(r"D:\", "*.xml")