How to display RTF content from clipboard in a PyQt interface? - python

I would need some of your expertise concerning GUI and more precisely PyQt4.
Context
I am currently designing a GUI with PyQt. It was previously done with wxPython, but I was kind of force to migrate to Qt due to internal issue.
At some point, I needed to display traditional RTF content, ie including hidden tags such as {\rtf1\ansi\ansicpg1252\deff0\deflang1036\deflangfe1036{\fonttbl{\f0\fswiss\fprq2\fcharset0 Calibri;}{\f1\froman\fprq2\fcharset2 Symbol;}}, \tx360 or \par and so on.
If I'm not mistaking, QTextEdit from PyQt can't "interpret" this RTF and will just display the whole string. But neither did wxPython and I had found a workaround provided by the wxPython community, which was to copy the string to windows clipboard and then paste it in the wanted text widget.
Thus, I had this piece of code:
class rtfClip():
def __init__(self):
self.CF_RTF = win32clipboard.RegisterClipboardFormat("Rich Text Format")
# Puts 'toPaste' on the clipboard
def setClipboard(self,toPaste):
cbOpened = False
# Wait for board availability, then do operations
while not cbOpened:
try:
win32clipboard.OpenClipboard(0)
cbOpened = True
win32clipboard.EmptyClipboard() # need to empty, or prev data will stay
win32clipboard.SetClipboardData(self.CF_RTF, toPaste)
win32clipboard.CloseClipboard()
except Exception, err:
# If access is denied, that means that the clipboard is in use.
# Keep trying until it's available.
if err[0] == 5: #Access Denied
pass
#print 'waiting on clipboard...'
# wait on clipboard because something else has it. we're waiting a
# random amount of time before we try again so we don't collide again
time.sleep( random.random()/50 )
elif err[0] == 1418: #doesn't have board open
pass
elif err[0] == 0: #open failure
pass
else:
print 'ERROR in Clipboard section of readcomments: %s' % err
pass
# Save the user's existing clipboard data, if possible. It is unable to save
# copied files, image data, etc; text, HTML, RTF, etc are preserved just fine
def saveClipboard(self):
cbOpened = False
while not cbOpened:
try:
win32clipboard.OpenClipboard(0)
cbOpened = True
self.cbSaved = {}
rval = win32clipboard.EnumClipboardFormats( 0 )
while rval != 0:
#print "Retrieving CB format %d" % rval
dat = win32clipboard.GetClipboardData( rval )
if rval == 15: #CF_HDROP
#this'll error, so just give up
self.cbSaved = {}
win32clipboard.EmptyClipboard()
break
else:
self.cbSaved[ rval ] = win32clipboard.GetClipboardData( rval )
rval = win32clipboard.EnumClipboardFormats( rval )
win32clipboard.CloseClipboard()
except Exception, err:
if err[0] == 5: #Access Denied
#print 'waiting on clipboard...'
time.sleep( random.random()/50 )
pass
elif err[0]== 6:
#print 'clipboard type error, aborting...'
win32clipboard.CloseClipboard()
break
elif err[0] == 1418: #doesn't have board open
cbOpened = False
elif err[0] == 0: #open failure
cbOpened = False
else:
print 'Error while saving clipboard: %s' % err
pass
# Restore the user's clipboard, if possible
def restoreClipboard(self):
cbOpened = False
# don't wait for the CB if we don't have to
if len(self.cbSaved) > 0:
#open clipboard
while not cbOpened:
try:
win32clipboard.OpenClipboard(0)
win32clipboard.EmptyClipboard()
cbOpened = True
except Exception, err:
if err[0] == 5: #Access Denied
#print 'waiting on clipboard...'
time.sleep( random.random()/50 )
pass
elif err[0] == 1418: #doesn't have board open
cbOpened = False
elif err[0] == 0: #open failure
cbOpened = False
else:
print 'Error with clipboard restoration: %s' % err
pass
#replace items
try:
for item in self.cbSaved:
data = self.cbSaved.get(item)
# windows appends NULL to most clipboard items, so strip off the NULL
if data[-1] == '\0':
data = data[:-1]
win32clipboard.SetClipboardData( item, data )
except Exception, err:
#print 'ERR: %s' % err
win32clipboard.EmptyClipboard()
try:
win32clipboard.CloseClipboard()
except:
pass
And then I just had to paste my RTF string in the associated widget:
rtf = copy_to_clipboard.rtfClip()
rtf.saveClipboard() # Save the current user's clipboard
rtf.setClipboard(my_rtf_string_full_of_rtf_tags) # Put our RTF on the clipboard
preview_dlg = preview_rtf_text(None)
preview_dlg.preview_rtf_ctrl.SetEditable(True)
preview_dlg.preview_rtf_ctrl.Paste() # Paste in into the textbox
rtf.restoreClipboard() # Restore the user's clipboard
preview_dlg.ShowModal()
preview_dlg.Destroy()
(preview_rtf_text being a class with only a TextCtrl named preview_rtf_ctrl)
Problem
My problem is that for any reason I can't manage to get this solution working with PyQt.
I have attempted designing a very similar solution with
rtf = copy_to_clipboard.rtfClip()
rtf.saveClipboard() # Save the current user's clipboard
rtf.setClipboard(rtf_content) # Put our RTF on the clipboard
#
rtf_preview_dlg = AEM_RTF_preview(self)
rtf_preview_dlg.rtf_preview_ctl.setReadOnly(False)
rtf_preview_dlg.rtf_preview_ctl.setAcceptRichText(True)
cursor = QtGui.QTextCursor(rtf_preview_dlg.rtf_preview_ctl.document())
cursor.setPosition(0)
rtf_preview_dlg.rtf_preview_ctl.setTextCursor(cursor)
rtf_preview_dlg.rtf_preview_ctl.paste()
rtf.restoreClipboard() # Restore the user's clipboard
rtf_preview_dlg.rtf_preview_ctl.setReadOnly(True)
rtf_preview_dlg.exec_()
But for any reason this won't work: nothing is pasted to the QTextEdit (rtf_preview_ctl).
I saw on some topics that PyQt had its own clipboard, but how would I make him "take" the content from the windows one? Is it even a solution?
Sorry for the very long question, I hope some of you may have an idea, since it would be an important feature of the GUI.
EDIT : There might be other solution for my need, my dream would just be to display formatted microsoft RTF content, one way or another.

I found a very old command line utility, unrtf. It outputs to STDOUT, so we need to process the output from there. It's been made for linux, but Windows binaries are available, even if the version I found is a bit older than the latest provided for Linux.
It requires to write a temporary file and might have some small issues with rtf conversion, but for simple cases seems to work fine enough.
In this case I automatically detect if there's some rtf content in the clipboard (so you can test it with along with your current program), but you can also paste raw rtf contents there to test it: you actually don't need the clipboard at all to make it work.
As far as I can understand it also supports tables and images, which are exported in external files (so you might have to test its behavior and possibly edit the html before actually applying it to the QTextEdit).
rtfTypes = set(['text/rtf', 'text/richtext', 'application/x-qt-windows-mime;value="Rich Text Format"'])
class PasteWidget(QtWidgets.QWidget):
def __init__(self):
QtWidgets.QWidget.__init__(self)
l = QtWidgets.QGridLayout()
self.setLayout(l)
self.input = QtWidgets.QTextEdit()
l.addWidget(self.input)
self.input.setAcceptRichText(False)
self.pasteBtn = QtWidgets.QPushButton('Paste')
l.addWidget(self.pasteBtn)
self.pasteBtn.clicked.connect(self.paste)
self.convertBtn = QtWidgets.QPushButton('Convert')
l.addWidget(self.convertBtn)
self.convertBtn.clicked.connect(self.convert)
self.output = QtWidgets.QTextEdit()
l.addWidget(self.output)
self.output.setReadOnly(True)
self.clipboard = QtWidgets.QApplication.clipboard()
self.clipboard.changed.connect(self.checkClipboard)
self.checkClipboard()
def checkClipboard(self, mode=QtGui.QClipboard.Clipboard):
if mode != QtGui.QClipboard.Clipboard:
return
self.pasteBtn.setEnabled(bool(set(self.clipboard.mimeData().formats()) & rtfTypes))
def paste(self):
mime = self.clipboard.mimeData()
for format in mime.formats():
if format in rtfTypes:
self.input.setPlainText(str(mime.data(format)))
def convert(self):
rtf = self.input.toPlainText()
if not rtf:
return
tempPath = QtCore.QDir.tempPath()
with open(os.path.join(tempPath, '_sourceRtf'), 'wb') as _input:
_input.write(rtf)
unrtf = QtCore.QProcess()
unrtf.readyReadStandardOutput.connect(lambda: self.output.setHtml(str(unrtf.readAllStandardOutput())))
unrtf.start('unrtf.exe', ['--html', os.path.join(tempPath, '_sourceRtf')])
Obviously the unrtf.exe has to be in the system path (or the path of the main script).

Related

Python for/while loop

Today i am working on a project about incoming phone calls being transcripted and getting saved into text files, but i am also kinda new to python and python loops.
I want to loop over a SQL server column and let each row loop trough the azure Speech to text service i use (all of the phonecall OID's). I have been stuck on this problem for a couple days now so i thought i might find some help here.
import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
import sys
import os.path
import pyodbc
databaseName = '*'
username = '*'
password = '*'
server = '*'
driver = '*'
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
while row:
array = [(int(row[1]))]
row = cursor.fetchone()
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
Telefoongesprek = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
with open("Telefoongesprek.mp3", "wb") as f:
f.write(Telefoongesprek.content)
src = "Telefoongesprek.mp3"
dst = "Telefoongesprek.wav"
sound = AudioSegment.from_file(src)
sound.export(dst, format="wav")
def speech_recognize_continuous_from_file():
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
speech_recognize_continuous_from_file()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
everything works apart form each other but i just dont know how to place the loop and witch one i should use (For/While loop). right here im trying to loop over an array but i dont this this is correct.
Error message: Decoding failed. ffmpeg returned error code: 1
[mp3 # 000001cb8c57e0o0] Failed to read frame size: could not seek to 1073.
which i am pretty sure means that my azure function can't find an mp3 file, what means that the "Mp3 to Wav" convert doesn't work.
Thanks in advance!
If I understand your question, you have a database with lots of phone call details. One of the field value in each row is used to create the associated mp3 file. You want to do speech to text using azure on each of the mp3 file you have in your database.
So you can do it in two ways:
Iterate though all rows in the database and create all the associted files into a folder in the local disk with the OID as your filename.
Then write another loop to iterate through this folder and send the files for transcription to Azure Speech to Text service.
The other technique is to do everything in a single loop like the way you have shown which will require some corrections.
Ok, so now that part is clear, we can go into the speech to text part. So azure allow you to send the compressed format for transcription, which means you actually don't need to convert it into wav file.
Please have a look at the modified code below with the changes:
# code snippet borrowed from azure samples
def speech_recognize_continuous_from_file(filename):
class BinaryFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
def __init__(self, filename: str):
super().__init__()
self._file_h = open(filename, "rb")
def read(self, buffer: memoryview) -> int:
try:
size = buffer.nbytes
frames = self._file_h.read(size)
buffer[:len(frames)] = frames
return len(frames)
except Exception as ex:
print('Exception in `read`: {}'.format(ex))
raise
def close(self) -> None:
print('closing file')
try:
self._file_h.close()
except Exception as ex:
print('Exception in `close`: {}'.format(ex))
raise
# Creates an audio stream format. For an example we are using MP3 compressed file here
compressed_format = speechsdk.audio.AudioStreamFormat(compressed_stream_format=speechsdk.AudioStreamContainerFormat.MP3)
callback = BinaryFileReaderCallback(filename=filename)
stream = speechsdk.audio.PullAudioInputStream(stream_format=compressed_format, pull_stream_callback=callback)
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# Creates a speech recognizer using a file as audio input, also specify the speech language
speech_recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
# loop through the rows
while row:
array = [(int(row[1]))]
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
telefoongesprek_response = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
# save the file to local disk as mp3
with open("Telefoongesprek.mp3", "wb") as f:
f.write(telefoongesprek_response.content)
# do the speech to text on the mp3 file
speech_recognize_continuous_from_file(f.name)
# fetch the next row
row = cursor.fetchone()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
I haven't tested this full code as i don't have the db connections with me. Please fell free to modify for your use case and let me know if you have any issues.

Tkinter - Python

i was programming a notepad with tkinter, but when i tried to define "createNewFile" that create before creating the new file, ask you to save, but it give me an error. Expected an indented block (, line 32). Here's the code:
def createNewFile():
if len(textArea.get("1.0", END+"-1c")) > 0:
if messagebox.askyesno("Save?", "Do you wish to save the file?"):
saveFile()
else:
textArea.delete("1.0",END)
def saveFile():
file = filedialog.asksaveasfile(mode='w',defaultextension=".txt", filetypes=(("Text File",".txt"), ("All Files","*.*"),("HTML Files",".html .htm"),("CSS Files",".css")))
if file != None:
data = self.textArea.get('1.0', END+'-1c')
file.write(data)
file.close()
The line that follows the second if statement in the createNewFile() function (i.e., saveFile()) needs to be indented.
def createNewFile():
if len(textArea.get("1.0", END+"-1c")) > 0:
if messagebox.askyesno("Save?", "Do you wish to save the file?"):
saveFile()
else:
textArea.delete("1.0",END)

Python QT5 with MultiThreading

Dears,
I am new on python and trying to start python networking with a simple code that have 2 plain text line edits and try to start 2 telnet sessions and get output of each session on a separate plain text line area however i get the below error
QObject::connect: Cannot queue arguments of type 'QTextCursor'
(Make sure 'QTextCursor' is registered using qRegisterMetaType().)
the code as below:
app = QApplication(sys.argv)
def to_str(bytes_or_str):
if isinstance(bytes_or_str, bytes):
value = bytes_or_str.decode() # uses 'utf-8' for encoding
else:
value = bytes_or_str
return value # Instance of str
def testTelnet_start(): # called when button pressed
_thread.start_new_thread(testTelnet,("192.168.3.247",1))
print("Test")
_thread.start_new_thread(testTelnet,("192.168.3.252",2))
def testTelnet(ip,x):
try:
tel_conn = telnetlib.Telnet()
tel_conn.open(host= ip)
data = tel_conn.read_until(b"login:",3)
data = to_str(data)
data = ip + "\n" + data
print(data)
if(x==1):
plain_text_area_Upgrade1.appendPlainText(data)
elif(x==2):
plain_text_area_Upgrade2.appendPlainText(data)
except Exception as e:
print(e)
my_Test_Window = QWidget()
my_Test_Window.setWindowTitle("Telnet Window")
my_Test_Window.resize(490,350)
my_Test_Window.setFixedSize(my_Test_Window.size())
push_test1 = QPushButton("Test#1",my_Test_Window)
push_test1.move(90,280)
plain_text_area_Upgrade1 = QPlainTextEdit(my_Test_Window)
plain_text_area_Upgrade1.resize(160,150)
plain_text_area_Upgrade1.updatesEnabled()
plain_text_area_Upgrade1.move(25,20)
plain_text_area_Upgrade1.setReadOnly(True)
plain_text_area_Upgrade1.insertPlainText("Testing ...")
plain_text_area_Upgrade1.appendPlainText("")
plain_text_area_Upgrade2 = QPlainTextEdit(my_Test_Window)
plain_text_area_Upgrade2.resize(160,150)
plain_text_area_Upgrade2.updatesEnabled()
plain_text_area_Upgrade2.move(250,20)
plain_text_area_Upgrade2.setReadOnly(True)
plain_text_area_Upgrade2.insertPlainText("Testing ...")
plain_text_area_Upgrade2.appendPlainText("")
push_test1.clicked.connect(testTelnet_start)
my_Test_Window.show()
app.exec_()
Any idea why a simple multi threading code cause those errors ?
Thanks.

How to read contents of a LibreOffice writer annotation from a python macro

LibreOffice writer allows the user to insert annotations(notes/comments) within the text.
My issue is I am unable to find a method to access the contents of a line specific annotation.
The following python code looks for selected/highlighted text and then strips out everything except a formatted time code (e.g. 01:10:23 or 11:10) which it converts into seconds.
If no text has been selected, it selects the entire current line and attempts to find the time code. However, the time code could be in an annotation.
I have managed to get a list of all of the annotations within the document, commented out at the start of the code but it is of no use to me.
I have been unable to discover a method of divining
a) whether the current line has an annotation or
b) how to access its contents.
If anyone has managed to achieve this, I'd appreciate any pointers.
def fs2_GoToTimestamp(*args):
#get the doc from the scripting context which is made available to all scripts
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
oSelected = model.getCurrentSelection()
#access annotations for the whole document
# oEnum = model.getTextFields().createEnumeration()
# cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
# while oEnum.hasMoreElements():
# oField = oEnum.nextElement()
# cursor.gotoRange(oField,False)
# print (cursor.getPosition())
# if oField.supportsService('com.sun.star.text.TextField.Annotation'):
# print (oField.Content)
# x = oField.getAnchor()
# print (dir(x))
oText = ""
try: #Grab the text selected/highlighted
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
except:pass
try:
if oText == "": # Nothing selected grab the whole line
cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
cursor.gotoStartOfLine(False) #move cursor to start without selecting (False)
cursor.gotoEndOfLine(True) #now move cursor to end of line selecting all (True)
oSelected = model.getCurrentSelection()
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
# Deselect line to avoid inadvertently deleting it on next keystroke
cursor.gotoStartOfLine(False)
except:pass
time = str(oText)
valid_chars=('0123456789:')
time = ''.join(char for char in time if char in valid_chars)
if time.count(":") == 1:
oM, oS = time.split(":")
oH = "00"
elif time.count(":") == 2:
oH,oM,oS = time.split(":")
else:
return None
if len(oS) != 2:
oS=oS[:2]
try:
secs = int(oS)
secs = secs + int(oM) * 60
secs = secs + int(oH) *3600
except:
return None
seek_instruction = 'seek'+str(secs)+'\n'
#Now do something with the seek instruction
Enumerate the annotations and use getAnchor() to find out where each is located. This answer is based on https://wiki.openoffice.org/wiki/Documentation/DevGuide/Text/Editing_Text#Text_Contents_Other_Than_Strings.
Your code is close to working.
while oEnum.hasMoreElements():
oField = oEnum.nextElement()
if oField.supportsService('com.sun.star.text.TextField.Annotation'):
xTextRange = oField.getAnchor()
cursor.gotoRange(xTextRange, False)
Instead of print (dir(x)), an introspection tool such as XrayTool or MRI will give better information. It makes the API docs easier to figure out.
With much needed help from Jim K a self answer is posted below. I have commented where I believe it will help most.
#!/usr/bin/python
from com.sun.star.awt.MessageBoxButtons import BUTTONS_OK
from com.sun.star.awt.MessageBoxType import INFOBOX
def fs2_GoToTimestamp(*args):
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
oSelected = model.getCurrentSelection()
doc = XSCRIPTCONTEXT.getDocument()
parentwindow = doc.CurrentController.Frame.ContainerWindow
cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
try:
CursorPos = cursor.getText().createTextCursorByRange(cursor)#Store original cursor position
except:# The cursor has been placed in the annotation not the text
mess = "Position cursor in the text\nNot the comment box"
heading = "Positioning Error"
MessageBox(parentwindow, mess, heading, INFOBOX, BUTTONS_OK)
return None
oText = ""
try: #Grab the text selected/highlighted
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
except:pass
try:
if oText == "": # Nothing selected grab the whole line
store_position = 0
cursor.gotoStartOfLine(False) #move cursor to start without selecting (False)
cursor.gotoEndOfLine(True) #now move cursor to end of line selecting all (True)
oSelected = model.getCurrentSelection()
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
y = cursor.getPosition()
store_position = y.value.Y
# Deselect line to avoid inadvertently deleting it on next user keystroke
cursor.gotoStartOfLine(False)
if oText.count(":") == 0:
# Still nothing found check for an annotation at this location
#enumerate through annotations for the whole document
oEnum = model.getTextFields().createEnumeration()
while oEnum.hasMoreElements():
oField = oEnum.nextElement()
if oField.supportsService('com.sun.star.text.TextField.Annotation'):
anno_at = oField.getAnchor()
cursor.gotoRange(anno_at,False)
pos = cursor.getPosition()
if pos.value.Y == store_position: # Found an annotation at this location
oText = oField.Content
break
# Re-set cursor to original position after enumeration & deselect
cursor.gotoRange(CursorPos,False)
except:pass
time = str(oText)
valid_chars=('0123456789:')
time = ''.join(char for char in time if char in valid_chars) #Strip out all invalid characters
if time.count(":") == 1: # time 00:00
oM, oS = time.split(":")
oH = "00"
elif time.count(":") == 2: # time 00:00:00
oH,oM,oS = time.split(":")
else:
return None
if len(oS) != 2: # in case time includes tenths 00:00.0 reduce to whole seconds
oS=oS[:2]
try:
secs = int(oS)
secs = secs + int(oM) * 60
secs = secs + int(oH) *3600
except:
return None
seek_instruction = 'seek'+str(secs)+'\n'
print("Seconds",str(secs))
# Do something with seek_instruction
def MessageBox(ParentWindow, MsgText, MsgTitle, MsgType, MsgButtons):
ctx = XSCRIPTCONTEXT.getComponentContext()
sm = ctx.ServiceManager
si = sm.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
mBox = si.createMessageBox(ParentWindow, MsgType, MsgButtons, MsgTitle, MsgText)
mBox.execute()

"Not implemented" Exception when using pywin32 to control Adobe Acrobat

I have written a script in python using pywin32 to save pdf files to text that up until recently was working fine. I use similar methods in Excel. The code is below:
def __pdf2Txt(self, pdf, fileformat="com.adobe.acrobat.accesstext"):
outputLoc = os.path.dirname(pdf)
outputLoc = os.path.join(outputLoc, os.path.splitext(os.path.basename(pdf))[0] + '.txt')
try:
win32com.client.gencache.EnsureModule('{E64169B3-3592-47d2-816E-602C5C13F328}', 0, 1, 1)
adobe = win32com.client.DispatchEx('AcroExch.App')
pdDoc = win32com.client.DispatchEx('AcroExch.PDDoc')
pdDoc.Open(pdf)
jObject = pdDoc.GetJSObject()
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
except:
traceback.print_exc()
return False
finally:
del jObject
pdDoc.Close()
del pdDoc
adobe.Exit()
del adobe
However this code has suddenly stopped working and I get the following output:
Traceback (most recent call last):
File "C:\Documents and Settings\ablishen\workspace\HooverKeyCreator\src\HooverKeyCreator.py", line 38, in __pdf2Txt
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
File "C:\Python27\lib\site-packages\win32com\client\dynamic.py", line 505, in __getattr__
ret = self._oleobj_.Invoke(retEntry.dispid,0,invoke_type,1)
com_error: (-2147467263, 'Not implemented', None, None)
False
I have similar code written in VB that works correctly so I'm guessing that it has something to do with the COM interfaces not binding to the appropriate functions correctly? (my COM knowledge is patchy).
Blish, this thread holds the key to the solution you are looking for: https://mail.python.org/pipermail/python-win32/2002-March/000260.html
I admit that the post above is not the easiest to find (probably because Google scores it low based on the age of the content?).
Specifically, applying this piece of advice will get things running for you: https://mail.python.org/pipermail/python-win32/2002-March/000265.html
For reference, the complete piece of code that does not require you to manually patch dynamic.py (snippet should run pretty much out of the box):
# gets all files under ROOT_INPUT_PATH with FILE_EXTENSION and tries to extract text from them into ROOT_OUTPUT_PATH with same filename as the input file but with INPUT_FILE_EXTENSION replaced by OUTPUT_FILE_EXTENSION
from win32com.client import Dispatch
from win32com.client.dynamic import ERRORS_BAD_CONTEXT
import winerror
# try importing scandir and if found, use it as it's a few magnitudes of an order faster than stock os.walk
try:
from scandir import walk
except ImportError:
from os import walk
import fnmatch
import sys
import os
ROOT_INPUT_PATH = None
ROOT_OUTPUT_PATH = None
INPUT_FILE_EXTENSION = "*.pdf"
OUTPUT_FILE_EXTENSION = ".txt"
def acrobat_extract_text(f_path, f_path_out, f_basename, f_ext):
avDoc = Dispatch("AcroExch.AVDoc") # Connect to Adobe Acrobat
# Open the input file (as a pdf)
ret = avDoc.Open(f_path, f_path)
assert(ret) # FIXME: Documentation says "-1 if the file was opened successfully, 0 otherwise", but this is a bool in practise?
pdDoc = avDoc.GetPDDoc()
dst = os.path.join(f_path_out, ''.join((f_basename, f_ext)))
# Adobe documentation says "For that reason, you must rely on the documentation to know what functionality is available through the JSObject interface. For details, see the JavaScript for Acrobat API Reference"
jsObject = pdDoc.GetJSObject()
# Here you can save as many other types by using, for instance: "com.adobe.acrobat.xml"
jsObject.SaveAs(dst, "com.adobe.acrobat.accesstext")
pdDoc.Close()
avDoc.Close(True) # We want this to close Acrobat, as otherwise Acrobat is going to refuse processing any further files after a certain threshold of open files are reached (for example 50 PDFs)
del pdDoc
if __name__ == "__main__":
assert(5 == len(sys.argv)), sys.argv # <script name>, <script_file_input_path>, <script_file_input_extension>, <script_file_output_path>, <script_file_output_extension>
#$ python get.txt.from.multiple.pdf.py 'C:\input' '*.pdf' 'C:\output' '.txt'
ROOT_INPUT_PATH = sys.argv[1]
INPUT_FILE_EXTENSION = sys.argv[2]
ROOT_OUTPUT_PATH = sys.argv[3]
OUTPUT_FILE_EXTENSION = sys.argv[4]
# tuples are of schema (path_to_file, filename)
matching_files = ((os.path.join(_root, filename), os.path.splitext(filename)[0]) for _root, _dirs, _files in walk(ROOT_INPUT_PATH) for filename in fnmatch.filter(_files, INPUT_FILE_EXTENSION))
# patch ERRORS_BAD_CONTEXT as per https://mail.python.org/pipermail/python-win32/2002-March/000265.html
global ERRORS_BAD_CONTEXT
ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
for filename_with_path, filename_without_extension in matching_files:
print "Processing '{}'".format(filename_without_extension)
acrobat_extract_text(filename_with_path, ROOT_OUTPUT_PATH, filename_without_extension, OUTPUT_FILE_EXTENSION)
I have tested this on WinPython x64 2.7.6.3, Acrobat X Pro
makepy.py is a script that comes with the win32com python package.
Running it for your installation "wires" python into the COM/OLE object in Windows. The following is an excerpt of some code I used to talk to Excel and do some stuff in it. This example gets the name of sheet 1 in the current workbook. It automatically runs makepy if it has an exception:
import win32com;
import win32com.client;
from win32com.client import selecttlb;
def attachExcelCOM():
makepyExe = r'python C:\Python25\Lib\site-packages\win32com\client\makepy.py';
typeList = selecttlb.EnumTlbs();
for tl in typeList:
if (re.match('^Microsoft.*Excel.*', tl.desc, re.IGNORECASE)):
makepyCmd = "%s -d \"%s\"" % (makepyExe, tl.desc);
os.system(makepyCmd);
# end if
# end for
# end def
def getSheetName(sheetNum):
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except Exception, detail:
print 'There was a problem attaching to Excel, refreshing connect config...';
print Exception, str(detail);
attachExcelCOM();
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except:
print 'Could not attach to Excel...';
sys.exit(-1);
# end try/except
# end try/except
wsName = wb.Name;
if (wsName == 'PERSONAL.XLS'):
return( None );
# end if
print 'The target worksheet is:';
print ' ', wsName;
print 'Is this correct? [Y/N]',;
answer = string.strip( sys.stdin.readline() );
answer = answer.upper();
if (answer != 'Y'):
print 'Sheet not identified correctly.';
return(None);
# end if
return( (wb, wsName) );
# end def
# -- Main --
sheetInfo = getSheetName(sheetNum);
if (sheetInfo == None):
print 'Sheet not found';
sys.exit(-1);
else:
(wb, wsName) = sheetInfo;
# end if

Categories

Resources