I have written a script in python using pywin32 to save pdf files to text that up until recently was working fine. I use similar methods in Excel. The code is below:
def __pdf2Txt(self, pdf, fileformat="com.adobe.acrobat.accesstext"):
outputLoc = os.path.dirname(pdf)
outputLoc = os.path.join(outputLoc, os.path.splitext(os.path.basename(pdf))[0] + '.txt')
try:
win32com.client.gencache.EnsureModule('{E64169B3-3592-47d2-816E-602C5C13F328}', 0, 1, 1)
adobe = win32com.client.DispatchEx('AcroExch.App')
pdDoc = win32com.client.DispatchEx('AcroExch.PDDoc')
pdDoc.Open(pdf)
jObject = pdDoc.GetJSObject()
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
except:
traceback.print_exc()
return False
finally:
del jObject
pdDoc.Close()
del pdDoc
adobe.Exit()
del adobe
However this code has suddenly stopped working and I get the following output:
Traceback (most recent call last):
File "C:\Documents and Settings\ablishen\workspace\HooverKeyCreator\src\HooverKeyCreator.py", line 38, in __pdf2Txt
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
File "C:\Python27\lib\site-packages\win32com\client\dynamic.py", line 505, in __getattr__
ret = self._oleobj_.Invoke(retEntry.dispid,0,invoke_type,1)
com_error: (-2147467263, 'Not implemented', None, None)
False
I have similar code written in VB that works correctly so I'm guessing that it has something to do with the COM interfaces not binding to the appropriate functions correctly? (my COM knowledge is patchy).
Blish, this thread holds the key to the solution you are looking for: https://mail.python.org/pipermail/python-win32/2002-March/000260.html
I admit that the post above is not the easiest to find (probably because Google scores it low based on the age of the content?).
Specifically, applying this piece of advice will get things running for you: https://mail.python.org/pipermail/python-win32/2002-March/000265.html
For reference, the complete piece of code that does not require you to manually patch dynamic.py (snippet should run pretty much out of the box):
# gets all files under ROOT_INPUT_PATH with FILE_EXTENSION and tries to extract text from them into ROOT_OUTPUT_PATH with same filename as the input file but with INPUT_FILE_EXTENSION replaced by OUTPUT_FILE_EXTENSION
from win32com.client import Dispatch
from win32com.client.dynamic import ERRORS_BAD_CONTEXT
import winerror
# try importing scandir and if found, use it as it's a few magnitudes of an order faster than stock os.walk
try:
from scandir import walk
except ImportError:
from os import walk
import fnmatch
import sys
import os
ROOT_INPUT_PATH = None
ROOT_OUTPUT_PATH = None
INPUT_FILE_EXTENSION = "*.pdf"
OUTPUT_FILE_EXTENSION = ".txt"
def acrobat_extract_text(f_path, f_path_out, f_basename, f_ext):
avDoc = Dispatch("AcroExch.AVDoc") # Connect to Adobe Acrobat
# Open the input file (as a pdf)
ret = avDoc.Open(f_path, f_path)
assert(ret) # FIXME: Documentation says "-1 if the file was opened successfully, 0 otherwise", but this is a bool in practise?
pdDoc = avDoc.GetPDDoc()
dst = os.path.join(f_path_out, ''.join((f_basename, f_ext)))
# Adobe documentation says "For that reason, you must rely on the documentation to know what functionality is available through the JSObject interface. For details, see the JavaScript for Acrobat API Reference"
jsObject = pdDoc.GetJSObject()
# Here you can save as many other types by using, for instance: "com.adobe.acrobat.xml"
jsObject.SaveAs(dst, "com.adobe.acrobat.accesstext")
pdDoc.Close()
avDoc.Close(True) # We want this to close Acrobat, as otherwise Acrobat is going to refuse processing any further files after a certain threshold of open files are reached (for example 50 PDFs)
del pdDoc
if __name__ == "__main__":
assert(5 == len(sys.argv)), sys.argv # <script name>, <script_file_input_path>, <script_file_input_extension>, <script_file_output_path>, <script_file_output_extension>
#$ python get.txt.from.multiple.pdf.py 'C:\input' '*.pdf' 'C:\output' '.txt'
ROOT_INPUT_PATH = sys.argv[1]
INPUT_FILE_EXTENSION = sys.argv[2]
ROOT_OUTPUT_PATH = sys.argv[3]
OUTPUT_FILE_EXTENSION = sys.argv[4]
# tuples are of schema (path_to_file, filename)
matching_files = ((os.path.join(_root, filename), os.path.splitext(filename)[0]) for _root, _dirs, _files in walk(ROOT_INPUT_PATH) for filename in fnmatch.filter(_files, INPUT_FILE_EXTENSION))
# patch ERRORS_BAD_CONTEXT as per https://mail.python.org/pipermail/python-win32/2002-March/000265.html
global ERRORS_BAD_CONTEXT
ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
for filename_with_path, filename_without_extension in matching_files:
print "Processing '{}'".format(filename_without_extension)
acrobat_extract_text(filename_with_path, ROOT_OUTPUT_PATH, filename_without_extension, OUTPUT_FILE_EXTENSION)
I have tested this on WinPython x64 2.7.6.3, Acrobat X Pro
makepy.py is a script that comes with the win32com python package.
Running it for your installation "wires" python into the COM/OLE object in Windows. The following is an excerpt of some code I used to talk to Excel and do some stuff in it. This example gets the name of sheet 1 in the current workbook. It automatically runs makepy if it has an exception:
import win32com;
import win32com.client;
from win32com.client import selecttlb;
def attachExcelCOM():
makepyExe = r'python C:\Python25\Lib\site-packages\win32com\client\makepy.py';
typeList = selecttlb.EnumTlbs();
for tl in typeList:
if (re.match('^Microsoft.*Excel.*', tl.desc, re.IGNORECASE)):
makepyCmd = "%s -d \"%s\"" % (makepyExe, tl.desc);
os.system(makepyCmd);
# end if
# end for
# end def
def getSheetName(sheetNum):
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except Exception, detail:
print 'There was a problem attaching to Excel, refreshing connect config...';
print Exception, str(detail);
attachExcelCOM();
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except:
print 'Could not attach to Excel...';
sys.exit(-1);
# end try/except
# end try/except
wsName = wb.Name;
if (wsName == 'PERSONAL.XLS'):
return( None );
# end if
print 'The target worksheet is:';
print ' ', wsName;
print 'Is this correct? [Y/N]',;
answer = string.strip( sys.stdin.readline() );
answer = answer.upper();
if (answer != 'Y'):
print 'Sheet not identified correctly.';
return(None);
# end if
return( (wb, wsName) );
# end def
# -- Main --
sheetInfo = getSheetName(sheetNum);
if (sheetInfo == None):
print 'Sheet not found';
sys.exit(-1);
else:
(wb, wsName) = sheetInfo;
# end if
Related
The Python script below has worked for me in converting multiple PDF documents to a single PDF when running on Python 2.7. I'm now trying to use it on 3.10 and am getting errors.
I believe I've conquered most of them, especially changing print to print( and disabling imports of CoreFoundation and Quartz.CoreGraphics.
It seems that the only remaining error is:
line 85, in main
writeContext = CGPDFContextCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault,
arg, len(arg), False), None, None) NameError: name
'CGPDFContextCreateWithURL' is not defined
If I declare CGPDFContextCreateWithURL as an empty global, the error shifts to CFURLCreateFromFileSystemRepresentation. Declare that and the error is kCFAllocatorDefault.
Here's how I'm trying to handle those.
global CGPDFContextCreateWithURL CGPDFContextCreateWithURL = ""
I don't understand that entire line so any help in making it right would be appreciated.
#
# join
# Joing pages from a a collection of PDF files into a single PDF file.
#
# join [--output <file>] [--shuffle] [--verbose]"
#
# Parameter:
#
# --shuffle
# Take a page from each PDF input file in turn before taking another from each file.
# If this option is not specified then all of the pages from a PDF file are appended
# to the output PDF file before the next input PDF file is processed.
#
# --verbose
# Write information about the doings of this tool to stderr.
#
import sys
import os
import getopt
import tempfile
import shutil
# from CoreFoundation import *
# from Quartz.CoreGraphics import *
global verbose
verbose = False
def createPDFDocumentWithPath(path):
if verbose:
print("Creating PDF document from file %s" % (path))
return CGPDFDocumentCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, path, len(path), False))
def writePageFromDoc(writeContext, doc, pageNum):
page = CGPDFDocumentGetPage(doc, pageNum)
if page:
mediaBox = CGPDFPageGetBoxRect(page, kCGPDFMediaBox)
if CGRectIsEmpty(mediaBox):
mediaBox = None
CGContextBeginPage(writeContext, mediaBox)
CGContextDrawPDFPage(writeContext, page)
CGContextEndPage(writeContext)
if verbose:
print("Copied page %d from %s" % (pageNum, doc))
def shufflePages(writeContext, docs, maxPages):
for pageNum in xrange(1, maxPages + 1):
for doc in docs:
writePageFromDoc(writeContext, doc, pageNum)
def append(writeContext, docs, maxPages):
for doc in docs:
for pageNum in xrange(1, maxPages + 1) :
writePageFromDoc(writeContext, doc, pageNum)
def main(argv):
global verbose
# The PDF context we will draw into to create a new PDF
writeContext = None
# If True then generate more verbose information
source = None
shuffle = False
# Parse the command line options
try:
options, args = getopt.getopt(argv, "o:sv", ["output=", "shuffle", "verbose"])
except getopt.GetoptError:
usage()
sys.exit(2)
for option, arg in options:
if option in ("-o", "--output") :
if verbose:
print("Setting %s as the destination." % (arg))
writeContext = CGPDFContextCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, arg, len(arg), False), None, None)
elif option in ("-s", "--shuffle") :
if verbose :
print("Shuffle pages to the output file.")
shuffle = True
elif option in ("-v", "--verbose") :
print("Verbose mode enabled.")
verbose = True
else :
print("Unknown argument: %s" % (option))
if writeContext:
# create PDFDocuments for all of the files.
docs = map(createPDFDocumentWithPath, args)
# find the maximum number of pages.
maxPages = 0
for doc in docs:
if CGPDFDocumentGetNumberOfPages(doc) > maxPages:
maxPages = CGPDFDocumentGetNumberOfPages(doc)
if shuffle:
shufflePages(writeContext, docs, maxPages)
else:
append(writeContext, docs, maxPages)
CGPDFContextClose(writeContext)
del writeContext
#CGContextRelease(writeContext)
def usage():
print("Usage: join [--output <file>] [--shuffle] [--verbose]")
if __name__ == "__main__":
main(sys.argv[1:])
Today i am working on a project about incoming phone calls being transcripted and getting saved into text files, but i am also kinda new to python and python loops.
I want to loop over a SQL server column and let each row loop trough the azure Speech to text service i use (all of the phonecall OID's). I have been stuck on this problem for a couple days now so i thought i might find some help here.
import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
import sys
import os.path
import pyodbc
databaseName = '*'
username = '*'
password = '*'
server = '*'
driver = '*'
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
while row:
array = [(int(row[1]))]
row = cursor.fetchone()
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
Telefoongesprek = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
with open("Telefoongesprek.mp3", "wb") as f:
f.write(Telefoongesprek.content)
src = "Telefoongesprek.mp3"
dst = "Telefoongesprek.wav"
sound = AudioSegment.from_file(src)
sound.export(dst, format="wav")
def speech_recognize_continuous_from_file():
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
speech_recognize_continuous_from_file()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
everything works apart form each other but i just dont know how to place the loop and witch one i should use (For/While loop). right here im trying to loop over an array but i dont this this is correct.
Error message: Decoding failed. ffmpeg returned error code: 1
[mp3 # 000001cb8c57e0o0] Failed to read frame size: could not seek to 1073.
which i am pretty sure means that my azure function can't find an mp3 file, what means that the "Mp3 to Wav" convert doesn't work.
Thanks in advance!
If I understand your question, you have a database with lots of phone call details. One of the field value in each row is used to create the associated mp3 file. You want to do speech to text using azure on each of the mp3 file you have in your database.
So you can do it in two ways:
Iterate though all rows in the database and create all the associted files into a folder in the local disk with the OID as your filename.
Then write another loop to iterate through this folder and send the files for transcription to Azure Speech to Text service.
The other technique is to do everything in a single loop like the way you have shown which will require some corrections.
Ok, so now that part is clear, we can go into the speech to text part. So azure allow you to send the compressed format for transcription, which means you actually don't need to convert it into wav file.
Please have a look at the modified code below with the changes:
# code snippet borrowed from azure samples
def speech_recognize_continuous_from_file(filename):
class BinaryFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
def __init__(self, filename: str):
super().__init__()
self._file_h = open(filename, "rb")
def read(self, buffer: memoryview) -> int:
try:
size = buffer.nbytes
frames = self._file_h.read(size)
buffer[:len(frames)] = frames
return len(frames)
except Exception as ex:
print('Exception in `read`: {}'.format(ex))
raise
def close(self) -> None:
print('closing file')
try:
self._file_h.close()
except Exception as ex:
print('Exception in `close`: {}'.format(ex))
raise
# Creates an audio stream format. For an example we are using MP3 compressed file here
compressed_format = speechsdk.audio.AudioStreamFormat(compressed_stream_format=speechsdk.AudioStreamContainerFormat.MP3)
callback = BinaryFileReaderCallback(filename=filename)
stream = speechsdk.audio.PullAudioInputStream(stream_format=compressed_format, pull_stream_callback=callback)
speech_config = speechsdk.SpeechConfig(subscription="*", region="*")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# Creates a speech recognizer using a file as audio input, also specify the speech language
speech_recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
telefoongesprek = str(all_results)
filename = f"C:\\Users\\Beau\\Contact-verkeer\\contact-verkeer\\telefoon\\STT Transcriptions\\Telefoongesprek#{OID}.txt"
file = open(filename, "w")
file.write(telefoongesprek)
file.close()
try:
CONNECTION_STRING = 'DRIVER='+driver+';SERVER='+server+';DATABASE='+databaseName+';UID='+username+';PWD='+ password
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
storedproc = "* = *'"
cursor.execute(storedproc)
row = cursor.fetchone()
# loop through the rows
while row:
array = [(int(row[1]))]
i = 0
while i<len(array):
OID = (array[i])
i = i + 1
print(OID)
string = f"{OID}*"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
telefoongesprek_response = requests.get(f"*{OID}", headers={f"api-key":f"{resultHash}"})
# save the file to local disk as mp3
with open("Telefoongesprek.mp3", "wb") as f:
f.write(telefoongesprek_response.content)
# do the speech to text on the mp3 file
speech_recognize_continuous_from_file(f.name)
# fetch the next row
row = cursor.fetchone()
cursor.close()
del cursor
conn.close()
except Exception as e:
print("Error: %s" % e)
I haven't tested this full code as i don't have the db connections with me. Please fell free to modify for your use case and let me know if you have any issues.
Trying to revive a PyUNO sample script called Wavelet to learn how LO works nowadays and get re-started. Since LibreOffice & UNO changed a bit from the script's creation time I am running into problems.
Managed to get the desktop object. Now I want to retrieve the open document's component. How do I achieve this properly? The desktop.getCurrentComponent() call returns None.
LibreOffice version: 6.4.6.2.
System: Ubuntu MATE 20.04 x86_64.
The code follows:
#!/usr/bin/python3
def TestWave(event):
wavelet = Wavelet(XSCRIPTCONTEXT)
wavelet.trigger( () )
import uno
import unohelper
import string
from com.sun.star.task import XJobExecutor
class Wavelet( unohelper.Base, XJobExecutor ):
def __init__( self, ctx ):
self.ctx = ctx
def trigger( self, args ):
desktop = self.ctx.ServiceManager.createInstanceWithContext(
"com.sun.star.frame.Desktop", self.ctx )
doc = desktop.getCurrentComponent()
print('doc:', doc)
#try:
search = doc.createSearchDescriptor()
search.SearchRegularExpression = True
search.SearchString = "\\<(k|s|v|z|o|u|i|a) "
found = doc.findFirst( search )
while found:
print("found:", found.String)
found.String = string.replace( found.String, " ", u"\xa0" )
found = doc.findNext( found.End, search)
#except:
# pass
g_ImplementationHelper = unohelper.ImplementationHelper()
g_ImplementationHelper.addImplementation(
Wavelet,
"name.vojta.openoffice.Wavelet",
("com.sun.star.task.Job",),)
if __name__ == "__main__":
import os
# Start OpenOffice.org, listen for connections and open testing document
os.system( "loffice '--accept=socket,host=localhost,port=2002;urp;' --writer ./WaveletTest.odt &" )
# Get local context info
localContext = uno.getComponentContext()
resolver = localContext.ServiceManager.createInstanceWithContext(
"com.sun.star.bridge.UnoUrlResolver", localContext )
ctx = None
# Wait until the OO.o starts and connection is established
while ctx == None:
try:
ctx = resolver.resolve(
"uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" )
except:
pass
# Trigger our job
wavelet = Wavelet( ctx )
wavelet.trigger( () )
Output:
doc: None
Traceback (most recent call last):
File "./wavelet.py", line 62, in <module>
wavelet.trigger( () )
File "./wavelet.py", line 24, in trigger
search = doc.createSearchDescriptor()
AttributeError: 'NoneType' object has no attribute 'createSearchDescriptor'
Edit 1
Cross posted at the following address:
https://ask.libreoffice.org/en/question/283785/why-does-desktopgetcurrentcomponent-return-none-in-pyuno/
Try without giving desktop.getCurrentComponent() a variable, so erase doc =. I remember I had that problem, but did not understand why it was doing it. All I remember is that not naming it made my code work. That is the only advice I can give you.
Tried to wait until the document component becomes available. And it worked:
doc = None
while doc is None:
doc = desktop.getCurrentComponent()
I'd like to be able to list the files in the shell:appsfolder in a python script but need the full path to do this using os.list. Is there a way to get the full path (or does anyone know it)? Alternatively, is there a different way I can list these files? Can I "cd" to it?
The idea behind the script is to automate the shortcut creation of all the Windows Store apps (identified by the fact they have a "long name" property I think) and extract those shortcuts to a folder where the program Launchy can detect them. I don't like having to manually go through the process of creating the shortcut (and renaming it to remove the " - shortcut) every time I download or remove an app so I thought I'd automate it.
Here's a function that hopefully does what you want in terms of creating shortcuts for the Windows Store apps that are listed in the "Applications" virtual folder (i.e. FOLDERID_AppsFolder). To classify Windows Store apps, it looks for an exclamation point in the Application User Model ID since the AUMID should be of the form "PackageFamily!ApplicationID" (see Automate Launching UWP Apps). For reliability it cross-checks each package family with the user's registered package families.
import os
import ctypes
import pywintypes
import pythoncom
import winerror
try:
import winreg
except ImportError:
# Python 2
import _winreg as winreg
bytes = lambda x: str(buffer(x))
from ctypes import wintypes
from win32com.shell import shell, shellcon
from win32com.propsys import propsys, pscon
# KNOWNFOLDERID
# https://msdn.microsoft.com/en-us/library/dd378457
# win32com defines most of these, except the ones added in Windows 8.
FOLDERID_AppsFolder = pywintypes.IID('{1e87508d-89c2-42f0-8a7e-645a0f50ca58}')
# win32com is missing SHGetKnownFolderIDList, so use ctypes.
_ole32 = ctypes.OleDLL('ole32')
_shell32 = ctypes.OleDLL('shell32')
_REFKNOWNFOLDERID = ctypes.c_char_p
_PPITEMIDLIST = ctypes.POINTER(ctypes.c_void_p)
_ole32.CoTaskMemFree.restype = None
_ole32.CoTaskMemFree.argtypes = (wintypes.LPVOID,)
_shell32.SHGetKnownFolderIDList.argtypes = (
_REFKNOWNFOLDERID, # rfid
wintypes.DWORD, # dwFlags
wintypes.HANDLE, # hToken
_PPITEMIDLIST) # ppidl
def get_known_folder_id_list(folder_id, htoken=None):
if isinstance(folder_id, pywintypes.IIDType):
folder_id = bytes(folder_id)
pidl = ctypes.c_void_p()
try:
_shell32.SHGetKnownFolderIDList(folder_id, 0, htoken,
ctypes.byref(pidl))
return shell.AddressAsPIDL(pidl.value)
except WindowsError as e:
if e.winerror & 0x80070000 == 0x80070000:
# It's a WinAPI error, so re-raise it, letting Python
# raise a specific exception such as FileNotFoundError.
raise ctypes.WinError(e.winerror & 0x0000FFFF)
raise
finally:
if pidl:
_ole32.CoTaskMemFree(pidl)
def enum_known_folder(folder_id, htoken=None):
id_list = get_known_folder_id_list(folder_id, htoken)
folder_shell_item = shell.SHCreateShellItem(None, None, id_list)
items_enum = folder_shell_item.BindToHandler(None,
shell.BHID_EnumItems, shell.IID_IEnumShellItems)
for item in items_enum:
yield item
def list_known_folder(folder_id, htoken=None):
result = []
for item in enum_known_folder(folder_id, htoken):
result.append(item.GetDisplayName(shellcon.SIGDN_NORMALDISPLAY))
result.sort(key=lambda x: x.upper())
return result
def create_shortcut(shell_item, shortcut_path):
id_list = shell.SHGetIDListFromObject(shell_item)
shortcut = pythoncom.CoCreateInstance(shell.CLSID_ShellLink, None,
pythoncom.CLSCTX_INPROC_SERVER, shell.IID_IShellLink)
shortcut.SetIDList(id_list)
persist = shortcut.QueryInterface(pythoncom.IID_IPersistFile)
persist.Save(shortcut_path, 0)
def get_package_families():
families = set()
subkey = (r'Software\Classes\Local Settings\Software\Microsoft'
r'\Windows\CurrentVersion\AppModel\Repository\Families')
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, subkey) as hkey:
index = 0
while True:
try:
families.add(winreg.EnumKey(hkey, index))
except OSError as e:
if e.winerror != winerror.ERROR_NO_MORE_ITEMS:
raise
break
index += 1
return families
def update_app_shortcuts(target_dir):
package_families = get_package_families()
for item in enum_known_folder(FOLDERID_AppsFolder):
try:
property_store = item.BindToHandler(None,
shell.BHID_PropertyStore, propsys.IID_IPropertyStore)
app_user_model_id = property_store.GetValue(
pscon.PKEY_AppUserModel_ID).ToString()
except pywintypes.error:
continue
# AUID template: Packagefamily!ApplicationID
if '!' not in app_user_model_id:
continue
package_family, app_id = app_user_model_id.rsplit('!', 1)
if package_family not in package_families:
continue
name = item.GetDisplayName(shellcon.SIGDN_NORMALDISPLAY)
shortcut_path = os.path.join(target_dir, '%s.lnk' % name)
create_shortcut(item, shortcut_path)
print('{}: {}'.format(name, app_user_model_id))
example
if __name__ == '__main__':
desktop = shell.SHGetFolderPath(0, shellcon.CSIDL_DESKTOP, 0, 0)
target_dir = os.path.join(desktop, 'Windows Store Apps')
if not os.path.exists(target_dir):
os.mkdir(target_dir)
update_app_shortcuts(target_dir)
I have a python script that calls a system program and reads the output from a file out.txt, acts on that output, and loops. However, it doesn't work, and a close investigation showed that the python script just opens out.txt once and then keeps on reading from that old copy. How can I make the python script reread the file on each iteration? I saw a similar question here on SO but it was about a python script running alongside a program, not calling it, and the solution doesn't work. I tried closing the file before looping back but it didn't do anything.
EDIT:
I already tried closing and opening, it didn't work. Here's the code:
import subprocess, os, sys
filename = sys.argv[1]
file = open(filename,'r')
foo = open('foo','w')
foo.write(file.read().rstrip())
foo = open('foo','a')
crap = open(os.devnull,'wb')
numSolutions = 0
while True:
subprocess.call(["minisat", "foo", "out"], stdout=crap,stderr=crap)
out = open('out','r')
if out.readline().rstrip() == "SAT":
numSolutions += 1
clause = out.readline().rstrip()
clause = clause.split(" ")
print clause
clause = map(int,clause)
clause = map(lambda x: -x,clause)
output = ' '.join(map(lambda x: str(x),clause))
print output
foo.write('\n'+output)
out.close()
else:
break
print "There are ", numSolutions, " solutions."
You need to flush foo so that the external program can see its latest changes. When you write to a file, the data is buffered in the local process and sent to the system in larger blocks. This is done because updating the system file is relatively expensive. In your case, you need to force a flush of the data so that minisat can see it.
foo.write('\n'+output)
foo.flush()
I rewrote it to hopefully be a bit easier to understand:
import os
from shutil import copyfile
import subprocess
import sys
TEMP_CNF = "tmp.in"
TEMP_SOL = "tmp.out"
NULL = open(os.devnull, "wb")
def all_solutions(cnf_fname):
"""
Given a file containing a set of constraints,
generate all possible solutions.
"""
# make a copy of original input file
copyfile(cnf_fname, TEMP_CNF)
while True:
# run minisat to solve the constraint problem
subprocess.call(["minisat", TEMP_CNF, TEMP_SOL], stdout=NULL,stderr=NULL)
# look at the result
with open(TEMP_SOL) as result:
line = next(result)
if line.startswith("SAT"):
# Success - return solution
line = next(result)
solution = [int(i) for i in line.split()]
yield solution
else:
# Failure - no more solutions possible
break
# disqualify found solution
with open(TEMP_CNF, "a") as constraints:
new_constraint = " ".join(str(-i) for i in sol)
constraints.write("\n")
constraints.write(new_constraint)
def main(cnf_fname):
"""
Given a file containing a set of constraints,
count the possible solutions.
"""
count = sum(1 for i in all_solutions(cnf_fname))
print("There are {} solutions.".format(count))
if __name__=="__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {} cnf.in".format(sys.argv[0]))
You take your file_var and end the loop with file_var.close().
for ... :
ga_file = open(out.txt, 'r')
... do stuff
ga_file.close()
Demo of an implementation below (as simple as possible, this is all of the Jython code needed)...
__author__ = ''
import time
var = 'false'
while var == 'false':
out = open('out.txt', 'r')
content = out.read()
time.sleep(3)
print content
out.close()
generates this output:
2015-01-09, 'stuff added'
2015-01-09, 'stuff added' # <-- this is when i just saved my update
2015-01-10, 'stuff added again :)' # <-- my new output from file reads
I strongly recommend reading the error messages. They hold quite a lot of information.
I think the full file name should be written for debug purposes.