I try to use an ini file to configure the resolution to use in my script and need help to know how to do this.
"Fontion script":
#RECUP QUALITE FHD
import re, os
def FHD(RFHD):
mykey = open("/home/gaaara/adn/tmp/ajax.json", "r")
for text in mykey:
match = re.search('"FHD":"(.+?).mp4', text)
if match:
s = 'http://www.website.fr:1935/' + match.group(1) + '.mp4?audioindex=0.smil'
return s
In fact it has 2 other similar functions in the file HD and SD which are the others function of resolution. How do I programmatically select the right function?
Edit
import ConfigParser
import sys
sys.path.append('files/')
from xrez import FHD
from xrez import HD
from xrez import SD
#variables
x1080 = FHD('RFHD')
x720 = HD('RHD')
x480 = SD('RSD')
#fin
config = ConfigParser.ConfigParser()
config.read('config.ini')
try:
val = config.get('resolution', 'Write the resolution wish', 'x1080' , 'x720' , 'x480' )
except:
sys.exit(1)
print val
You can use the Python ConfigParser library. This will read your INI file and give you the parameters you need (e.g. resolution), which you can then use in your JSON downloading code.
some ini file like that:
[section1]
var1=value1
Would be read by that:
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('conf.ini')
try:
val = config.get('section1', 'var1')
except:
sys.exit(1)
print val
Related
I am trying to programmatically merge two Microsoft Word files:
and:
I wrote a program with python-docx:
from docx import Document
t1 = Document("test1.docx")
t2 = Document("test2.docx")
for p in t2.paragraphs:
t1.add_paragraph(p.text,p.style)
t1.save("test1-new.docx")
I got this result:
As you can see, I got the text and the basic paragraph style, but lost the per-character style.
Is there any way to keep it?
I ran a small test where I made a document like this:
hello
hello
hello
from docx import Document
t1 = Document("test.docx")
for p in t1.paragraphs:
for run in p.runs:
#print([method for method in dir(run.style)])
print(run.font.bold, run.font.italic)
Returns:
None None
True None
True True
So if you put some more effort you can extract the Bold and Italic from the runs inside the paragraph.
Here is working code:
#!/usr/bin/env python3.6
import os
import os.path
from docx import Document
def append_to_doc(doc,fname):
t = Document(fname)
for p in t.paragraphs:
doc.add_paragraph("",p.style) # add an empty paragraph in the matching style
for r in p.runs:
nr = doc.paragraphs[-1].add_run(r.text)
nr.bold = r.bold
nr.italic = r.italic
nr.underline = r.underline
if __name__=="__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--output",help="Output file")
parser.add_argument("--template",help="Base file")
parser.add_argument("files",nargs="+",help="Files to add")
args = parser.parse_args()
if not args.output:
raise RuntimeError("--output required")
if os.path.exists(args.output):
raise RuntimeError(f"{args.output} exists")
if not args.template:
raise RuntimeError("--template required")
doc = Document(args.template)
for fname in args.files:
append_to_doc(doc,fname)
doc.save(args.output)
My goal is quite simple, but I couldn't find it on the guide for configobj.
When I run my code I want it to write to a file but not erase what there's in the file already.
I would like everytime I run this it should write underneath what's already in the file
This is my current code: That erase/overwrite what's inside the dasd.ini already
from configobj import ConfigObj
config = ConfigObj()
config.filename = "dasd.ini"
#
config['hey'] = "value1"
config['test'] = "value2"
#
config['another']['them'] = "value4"
#
config.write()
this would be remarkably simpler if configobj accepted a file-like object instead of a file name. This is a solution i offered in comments.
import tempfile
with tempfile.NamedTemporaryFile() as t1, tempfile.NamedTemporaryFile() as t2, open('dasd.ini', 'w') as fyle:
config = ConfigObj()
config.filename = t1.file.name
config['hey'] = "value1"
config['test'] = "value2"
config['another']['them'] = "value4"
config.write()
do_your_thing_with_(t2)
t1.seek(0)
t2.seek(0)
fyle.write(t2.read())
fyle.write(t1.read())
If I understand your question correctly, doing what you want is a very simple change. Use the following syntax to create your initial config object. This reads in keys and values from the existing file.
config = ConfigObj("dasd.ini")
Then you can add new settings or change the existing ones as in your example code.
config['hey'] = "value1"
config['test'] = "value2"
After you write it out using config.write(), you'll find that your dasd.ini file contains the original and new keys/values merged. It also preserves any comments you had in your original ini file, with new keys/values added to the end of each section.
Check out this link, I found it to be quite helpful: An Introduction to ConfigObj
try it:
You have to read all keys and values of the section if the section existed already
and then write the whole section data
# -*- coding: cp950 -*-
import configobj
import os
#-------------------------------------------------------------------------
# _readINI(ini_file, szSection, szKey)
# read KeyValue from a ini file
# return True/False, KeyValue
#-------------------------------------------------------------------------
def _readINI(ini_file, szSection, szKey=None):
ret = None
keyvalue = None
if os.path.exists(ini_file) :
try:
config = configobj.ConfigObj(ini_file, encoding='UTF8')
if not szKey==None :
keyvalue = config[szSection][szKey]
else:
keyvalue = config[szSection]
ret = True
print keyvalue
except Exception, e :
ret = False
return ret, keyvalue
#-------------------------------------------------------------------------
# _writeINI(ini_file, szSection, szKey, szKeyValue):
# write key value into a ini file
# return True/False
# You have to read all keys and values of the section if the section existed already
# and then write the whole section data
#-------------------------------------------------------------------------
def _writeINI(ini_file, szSection, szKey, szKeyValue):
ret = False
try:
ret_section = _readINI(ini_file, szSection)
if not os.path.exists(ini_file) :
# create a new ini file with cfg header comment
CreateNewIniFile(ini_file)
config = configobj.ConfigObj(ini_file, encoding='UTF8')
if ret_section[1] == None :
config[szSection] = {}
else :
config[szSection] = ret_section[1]
config[szSection][szKey] = szKeyValue
config.write()
ret = True
except Exception, e :
print str(e)
return ret
#-------------------------------------------------------------------------
# CreateNewIniFile(ini_file)
# create a new ini with header comment
# return True/False
#-------------------------------------------------------------------------
def CreateNewIniFile(ini_file):
ret = False
try:
if not os.path.exists(ini_file) :
f= open(ini_file,'w+')
f.write('########################################################\n')
f.write('# Configuration File for Parallel Settings of Moldex3D #\n')
f.write('# Please Do Not Modify This File #\n')
f.write('########################################################\n')
f.write('\n\n')
f.close()
ret = True
except Exception, e :
print e
return ret
#----------------------------------------------------------------------
if __name__ == "__main__":
path = 'D:\\settings.cfg'
_writeINI(path, 'szSection', 'szKey', u'kdk12341 他dkdk')
_writeINI(path, 'szSection', 'szKey-1', u'kdk123412dk')
_writeINI(path, 'szSection', 'szKey-2', u'kfffk')
_writeINI(path, 'szSection', 'szKey-3', u'dhhhhhhhhhhhh')
_writeINI(path, 'szSection-333', 'ccc', u'555')
#_writeINI(path, 'szSection-222', '', u'')
print _readINI(path, 'szSection', 'szKey-2')
print _readINI(path, 'szSection-222')
#CreateNewIniFile(path)
Say you have to join some pages that are number 2, 4 and 5… (the files are named test_002.pdf, test_004.pdf and test_005.pdf), then we could say there is a page 3 missing.
What I try to do is having a result from those commands :
pdfjam --nup 2 --papersize '{47cm,30cm}' --scale 1.0 test_002.pdf test_003.pdf --outfile joined_002-003.pdf
pdfjam --nup 2 --papersize '{47cm,30cm}' --scale 1.0 test_004.pdf test_005.pdf --outfile joined_004-005.pdf
that will join even and odd page in one unique page, with a blank page (3) in place of the missing page.
I guess it should:
check incoming files from the beginning to the end looking for what page is missing (in this case from 2 to 5 missing #3)
on-the-fly generate blank '23.5cm,30cm' pdf pages (using pyPdf maybe)
classify them 'even' and 'odd' as couples to be able to join every even with odd page (using pdfjam)…
Am I right?
Is that possible with some lines of Python?
Or is there a easier way?
Because here's what I started to do, making it work like an hotfolder, but I'm really completely lost in the even and odd management and missing "files/pages" :
#!/usr/bin/python
# -*- coding: UTF8 -*-
import os
import os.path
import re
import time
import datetime
CODEFILE = re.compile("^(TES|EXA).*\.pdf$")
WHERE = "/tmp/TEST/"
STORAGE = "/tmp/WORK/"
DBLSIZE = "{47cm,30cm}"
def time_stamp():
now = datetime.datetime.now()
return now.strftime("%Y-%m-%d %H:%M:%S")
print(time_stamp()+" : Starting.")
def files_list(path):
this_files = list()
root, dires, files = os.walk(path).next()
for f in files:
if CODEFILE.match(f):
this_files.append(os.path.join(root, f))
return this_files
def file_sizes(filename):
meta = os.lstat(filename)
return meta.st_size
def files_to_handle(path):
this_files = list()
ft1 = dict()
ft2 = dict()
for f in files_list(WHERE):
ft1[f] = file_sizes(f)
time.sleep(10)
for f in files_list(WHERE):
ft2[f] = file_sizes(f)
for f, t in ft2.items():
try:
if ft1[f] == t:
this_files.append(f)
except:
pass
return this_files
r = files_to_handle(WHERE)
print(time_stamp()+" : Files available :")
print(r)
for f in r:
rc = os.system("pdfjam --batch --nup 2 --papersize {1} --scale 1.0 --outfile . {2}".format(
DBLSIZE, f))
if rc != 0:
print(time_stamp()+" : an ERROR as occured with the file {0}.".format(f))
else:
print(time_stamp()+" : files {0} OK.".format(f))
os.system("mv {0} {1}".format(f, STORAGE))
print(time_stamp()+" : Stopping.")
Thanks in advance!
I have written a script in python using pywin32 to save pdf files to text that up until recently was working fine. I use similar methods in Excel. The code is below:
def __pdf2Txt(self, pdf, fileformat="com.adobe.acrobat.accesstext"):
outputLoc = os.path.dirname(pdf)
outputLoc = os.path.join(outputLoc, os.path.splitext(os.path.basename(pdf))[0] + '.txt')
try:
win32com.client.gencache.EnsureModule('{E64169B3-3592-47d2-816E-602C5C13F328}', 0, 1, 1)
adobe = win32com.client.DispatchEx('AcroExch.App')
pdDoc = win32com.client.DispatchEx('AcroExch.PDDoc')
pdDoc.Open(pdf)
jObject = pdDoc.GetJSObject()
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
except:
traceback.print_exc()
return False
finally:
del jObject
pdDoc.Close()
del pdDoc
adobe.Exit()
del adobe
However this code has suddenly stopped working and I get the following output:
Traceback (most recent call last):
File "C:\Documents and Settings\ablishen\workspace\HooverKeyCreator\src\HooverKeyCreator.py", line 38, in __pdf2Txt
jObject.SaveAs(outputLoc, "com.adobe.acrobat.accesstext")
File "C:\Python27\lib\site-packages\win32com\client\dynamic.py", line 505, in __getattr__
ret = self._oleobj_.Invoke(retEntry.dispid,0,invoke_type,1)
com_error: (-2147467263, 'Not implemented', None, None)
False
I have similar code written in VB that works correctly so I'm guessing that it has something to do with the COM interfaces not binding to the appropriate functions correctly? (my COM knowledge is patchy).
Blish, this thread holds the key to the solution you are looking for: https://mail.python.org/pipermail/python-win32/2002-March/000260.html
I admit that the post above is not the easiest to find (probably because Google scores it low based on the age of the content?).
Specifically, applying this piece of advice will get things running for you: https://mail.python.org/pipermail/python-win32/2002-March/000265.html
For reference, the complete piece of code that does not require you to manually patch dynamic.py (snippet should run pretty much out of the box):
# gets all files under ROOT_INPUT_PATH with FILE_EXTENSION and tries to extract text from them into ROOT_OUTPUT_PATH with same filename as the input file but with INPUT_FILE_EXTENSION replaced by OUTPUT_FILE_EXTENSION
from win32com.client import Dispatch
from win32com.client.dynamic import ERRORS_BAD_CONTEXT
import winerror
# try importing scandir and if found, use it as it's a few magnitudes of an order faster than stock os.walk
try:
from scandir import walk
except ImportError:
from os import walk
import fnmatch
import sys
import os
ROOT_INPUT_PATH = None
ROOT_OUTPUT_PATH = None
INPUT_FILE_EXTENSION = "*.pdf"
OUTPUT_FILE_EXTENSION = ".txt"
def acrobat_extract_text(f_path, f_path_out, f_basename, f_ext):
avDoc = Dispatch("AcroExch.AVDoc") # Connect to Adobe Acrobat
# Open the input file (as a pdf)
ret = avDoc.Open(f_path, f_path)
assert(ret) # FIXME: Documentation says "-1 if the file was opened successfully, 0 otherwise", but this is a bool in practise?
pdDoc = avDoc.GetPDDoc()
dst = os.path.join(f_path_out, ''.join((f_basename, f_ext)))
# Adobe documentation says "For that reason, you must rely on the documentation to know what functionality is available through the JSObject interface. For details, see the JavaScript for Acrobat API Reference"
jsObject = pdDoc.GetJSObject()
# Here you can save as many other types by using, for instance: "com.adobe.acrobat.xml"
jsObject.SaveAs(dst, "com.adobe.acrobat.accesstext")
pdDoc.Close()
avDoc.Close(True) # We want this to close Acrobat, as otherwise Acrobat is going to refuse processing any further files after a certain threshold of open files are reached (for example 50 PDFs)
del pdDoc
if __name__ == "__main__":
assert(5 == len(sys.argv)), sys.argv # <script name>, <script_file_input_path>, <script_file_input_extension>, <script_file_output_path>, <script_file_output_extension>
#$ python get.txt.from.multiple.pdf.py 'C:\input' '*.pdf' 'C:\output' '.txt'
ROOT_INPUT_PATH = sys.argv[1]
INPUT_FILE_EXTENSION = sys.argv[2]
ROOT_OUTPUT_PATH = sys.argv[3]
OUTPUT_FILE_EXTENSION = sys.argv[4]
# tuples are of schema (path_to_file, filename)
matching_files = ((os.path.join(_root, filename), os.path.splitext(filename)[0]) for _root, _dirs, _files in walk(ROOT_INPUT_PATH) for filename in fnmatch.filter(_files, INPUT_FILE_EXTENSION))
# patch ERRORS_BAD_CONTEXT as per https://mail.python.org/pipermail/python-win32/2002-March/000265.html
global ERRORS_BAD_CONTEXT
ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
for filename_with_path, filename_without_extension in matching_files:
print "Processing '{}'".format(filename_without_extension)
acrobat_extract_text(filename_with_path, ROOT_OUTPUT_PATH, filename_without_extension, OUTPUT_FILE_EXTENSION)
I have tested this on WinPython x64 2.7.6.3, Acrobat X Pro
makepy.py is a script that comes with the win32com python package.
Running it for your installation "wires" python into the COM/OLE object in Windows. The following is an excerpt of some code I used to talk to Excel and do some stuff in it. This example gets the name of sheet 1 in the current workbook. It automatically runs makepy if it has an exception:
import win32com;
import win32com.client;
from win32com.client import selecttlb;
def attachExcelCOM():
makepyExe = r'python C:\Python25\Lib\site-packages\win32com\client\makepy.py';
typeList = selecttlb.EnumTlbs();
for tl in typeList:
if (re.match('^Microsoft.*Excel.*', tl.desc, re.IGNORECASE)):
makepyCmd = "%s -d \"%s\"" % (makepyExe, tl.desc);
os.system(makepyCmd);
# end if
# end for
# end def
def getSheetName(sheetNum):
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except Exception, detail:
print 'There was a problem attaching to Excel, refreshing connect config...';
print Exception, str(detail);
attachExcelCOM();
try:
xl = win32com.client.Dispatch("Excel.Application");
wb = xl.Workbooks.Item(sheetNum);
except:
print 'Could not attach to Excel...';
sys.exit(-1);
# end try/except
# end try/except
wsName = wb.Name;
if (wsName == 'PERSONAL.XLS'):
return( None );
# end if
print 'The target worksheet is:';
print ' ', wsName;
print 'Is this correct? [Y/N]',;
answer = string.strip( sys.stdin.readline() );
answer = answer.upper();
if (answer != 'Y'):
print 'Sheet not identified correctly.';
return(None);
# end if
return( (wb, wsName) );
# end def
# -- Main --
sheetInfo = getSheetName(sheetNum);
if (sheetInfo == None):
print 'Sheet not found';
sys.exit(-1);
else:
(wb, wsName) = sheetInfo;
# end if
I have a problem. My program is using config file to set options, and one of those options is a tuple. Here's what i mean:
[common]
logfile=log.txt
db_host=localhost
db_user=root
db_pass=password
folder[1]=/home/scorpil
folder[2]=/media/sda5/
folder[3]=/media/sdb5/
etc...
Can i parse this into tuple with ConfigParser module in Python? Is there some easy way to do this?
if you can change config format like this:
folder = /home/scorpil
/media/sda5/
/media/sdb5/
then in python:
config.get("common", "folder").split("\n")
Your config could be:
[common]
logfile=log.txt
db_host=localhost
db_user=root
db_pass=password
folder = ("/home/scorpil", "/media/sda5/", "/media/sdb5/")
Assuming that you have config in a file named foo.cfg, you can do the following:
import ConfigParser
cp = ConfigParser.ConfigParser()
cp.read("foo.cfg")
folder = eval(cp.get("common", "folder"), {}, {})
print folder
print type(folder)
which should produce:
('/home/scorpil', '/media/sda5/', '/media/sdb5/')
<type 'tuple'>
-- EDIT --
I've since changed my mind about this, and would take the position today that using eval in this context is a bad idea. Even with a restricted environment, if the configuration file is under user control it may be a very bad idea. Today I'd probably recommend doing something interesting with split to avoid malicious code execution.
You can get the items list and use a list comprehension to create a list of all the items which name starts with a defined prefix, in your case folder
folders = tuple([ item[1] for item in configparser.items() if item[0].startswith("folder")])
Create configuration:
folders = ['/home/scorpil', '/media/sda5/', '/media/sdb5/']
config.set('common', 'folders', json.dumps(folders))
Load configuration:
tuple(json.loads(config.get('common', 'folders')))
I don't know ConfigParser, but you can easily read it into a list (perhaps using .append()) and then do myTuple = tuple(myList)
#!/usr/bin/env python
sample = """
[common]
logfile=log.txt
db_host=localhost
db_user=root
db_pass=password
folder[1]=/home/scorpil
folder[2]=/media/sda5/
folder[3]=/media/sdb5/
"""
from cStringIO import StringIO
import ConfigParser
import re
FOLDER_MATCH = re.compile(r"folder\[(\d+)\]$").match
def read_list(items,pmatch=FOLDER_MATCH):
if not hasattr(pmatch,"__call__"):
pmatch = re.compile(pmatch).match
folder_list = []
for k,v in items:
m = pmatch(k)
if m:
folder_list.append((int(m.group(1)),v))
return tuple( kv[1] for kv in sorted(folder_list) )
if __name__ == '__main__':
cp = ConfigParser.SafeConfigParser()
cp.readfp(StringIO(sample),"sample")
print read_list(cp.items("common"))
You could stick to json completely
tst.json
{
"common": {
"logfile":"log.txt",
"db_host":"localhost",
"db_user":"root",
"db_pass":"password",
"folder": [
"/home/scorpil",
"/media/sda5/",
"/media/sdb5/"
]
}
}
then work with it
$ python3
>>> import json
>>> with open("tst.json", "r", encoding="utf8") as file_object:
... job = json.load(file_object)
...
>>> job
{'common': {'db_pass': 'password', 'logfile':
'log.txt', 'db_user': 'root', 'folder':
['/home/scorpil', '/media/sda5/', '/media/sdb5/'],
'db_host': 'localhost'}}
>>> print (job["common"]["folder"][0])
/home/scorpil
>>> print (job["common"]["folder"][1])
/media/sda5/
print (job["common"]["folder"][2])
/media/sdb5/
>>> folder_tuple = tuple(job["common"]["folder"])
>>> folder_tuple
('/home/scorpil', '/media/sda5/', '/media/sdb5/')