How to import data from scanned text into Django models - python

I have a hundreds of pages of "quiz" questions, multiple-choice options and associated answer keys and explanations. I'm trying to create a simple Django app to administer these questions. I have created a simple but effective Python parser to parse the scanned, OCR'd pages into the proper objects.
I want to have a "utility" to enable the administrator of this Django app to import quiz content from OCR'd paper into the relevant Django DB tables. This will be a rare task, and something that is not necessarily appropriate for inclusion in a web UI.
I've asked about using intermediate JSON/YAML fixtures and was told that a more appropriate approach would be to directly create and save instances of my models [1]. I then tried to create a standalone script along the lines suggested by [2] and [3] but was unable to overcome the
kwargs = {"app_label": model_module.__name__.split('.')[-2]}
IndexError: list index out of range
error.
I also came across [4] about creating custom django-admin.py/manage.py commands. This seems like a logically appropriate way of dealing with the task; but, I'd love to hear from those with more experience and brains (I've eaten all mine :).
References:
Importing data from scanned text into Django as YAML fixture or SQL
what is the simplest way to create a table use django db api ,and base on 'Standalone Django scripts'
Standalone Scripts
Writing custom django-admin commands
Examples:
OCR'd Text
Page 12
34. Hiedegger is a _____ . (a) philosopher (b) boozy beggar (c) both a and b (d) none of these 35. ...
Django Models
class Question(models.Model):
text = models.TextField()
class Choice(models.Model):
question = models.ForeignKey(Question)
order = models.IntegerField(default=1)
text = models.TextField()
Goal, something like this...
q = Question.objects.create(text="Hiedegger is a _____ .")
q.save()
c = Choice(text="philosopher", order=1, question=q.pk)
c.save()

This is the working version I came up with. Dirty, but effective. Both #akonsu and #Ivan Kharlamov were helpful. Thanks...
import os, re, Levenshtein as lev, codecs
from SimpleQuiz.quiz.models import Choice, Question
from django.core.management.base import BaseCommand, CommandError
import optparse
class Command(BaseCommand):
args = '--datapath=/path/to/text/data/'
can_import_settings = True
help = 'Imports scanned text into Questions and Choices'
option_list = BaseCommand.option_list + (
optparse.make_option('--datapath', action='store', type='string',
dest='datapath',
help='Path to OCRd text files to be parsed.'),
)
requires_model_validation = True
# Parser REs
BACKUP_RE = re.compile(r'\~$|bak$|back$|backup$')
QUEST_RE = re.compile(r'^[0-9]{1,3}[.][ ]')
CHOICE_RE = re.compile(r'^[a-e][.][ ]')
def handle(self, *args, **options):
# get the data path
try:
os.path.exists(options['datapath'])
except Exception as e:
raise CommandError("None or invalid path provided: %s" % e.message)
self.datapath = os.path.expanduser(options['datapath'])
# generate list of text strings from lines in target files
self.data_lines = []
for fn in os.listdir(os.path.join(self.datapath, 'questions/')):
if self.BACKUP_RE.search(fn):
self.stderr.write("Skipping backup: %s\n" % (fn))
else:
for line in codecs.open(os.path.join(self.datapath, 'questions/', fn), 'r', encoding='latin-1'):
if not self.is_boilerplate(line):
if not line.strip() == '':
self.data_lines.append(line)
#-----------------------------------------------------------------------
#--------------------- Parse the text lines and create Questions/Choices
#-----------------------------------------------------------------------
cur_quest = None
cur_choice = None
cur_is_quest = False
questions = {}
choices = {}
for line in self.data_lines:
if self.is_question(line):
[n, txt] = line.split('.', 1)
qtext = txt.rstrip() + " "
q = Question.objects.create(text=qtext)
q.save()
cur_quest = q.pk
questions[cur_quest] = q
cur_is_quest = True
elif self.is_choice(line):
[n, txt] = line.split('.', 1)
num = self.char2dig(n)
ctext = txt.rstrip() + " "
c = Choice.objects.create(text=ctext, order=num, question=questions[cur_quest])
c.save()
cur_choice = c.pk
choices[cur_choice] = c
cur_is_quest = False
else:
if cur_is_quest:
questions[cur_quest].text += line.rstrip() + " "
questions[cur_quest].save()
else:
choices[cur_choice].text += line.rstrip() + " "
choices[cur_choice].save()
self.stdout.write("----- FINISHED -----\n")
return None
def is_question(self, arg_str):
if self.QUEST_RE.search(arg_str):
return True
else:
return False
def is_choice(self, arg_str):
if self.CHOICE_RE.search(arg_str):
return True
else:
return False
def char2dig(self, x):
if x == 'a':
return 1
if x == 'b':
return 2
if x == 'c':
return 3
if x == 'd':
return 4
if x == 'e':
return 5
def is_boilerplate(self, arg_str):
boilerplate = [u'MFT PRACTICE EXAMINATIONS',
u'BERKELEY TRAINING ASSOCIATES ' + u'\u00A9' + u' 2009',
u'BERKELEY TRAINING ASSOCIATES',
u'MARRIAGE AND FAMILY THERAPY',
u'PRACTICE EXAMINATION 41',
u'Page 0', u'Page 1', u'Page 2', u'Page 3', u'Page 4',
u'Page 5', u'Page 6', u'Page 7', u'Page 8', u'Page 9',
]
for bp in boilerplate:
if lev.distance(bp.encode('utf-8'), arg_str.encode('utf-8')) < 4:
return True
return False

I then tried to create a standalone script along the lines suggested
by [2] and [3] but was unable to overcome the kwargs = {"app_label":
model_module.name.split('.')[-2]} IndexError: list index out of
range error.
I had the same list index error. It was caused by the way I imported the models in my script. I used to do it like this:
from models import Table1, Table2
Then I realized the Python script is not part of the application, so I changed the import to:
from myapp.models import Table1, Table2
My Python script is started with the following shell script:
export DJANGO_SETTINGS_MODULE=settings
export PYTHONPATH=/path/to/my/site
python myscript.py "$#"

Related

How to parse a single-column text file into a table using python?

I'm new here to StackOverflow, but I have found a LOT of answers on this site. I'm also a programming newbie, so i figured i'd join and finally become part of this community - starting with a question about a problem that's been plaguing me for hours.
I login to a website and scrape a big body of text within the b tag to be converted into a proper table. The layout of the resulting Output.txt looks like this:
BIN STATUS
8FHA9D8H 82HG9F RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
INVENTORY CODE: FPBC *SOUP CANS LENTILS
BIN STATUS
HA8DHW2H HD0138 RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8SHDNADU 00A123 #2956- INVALID STOCK COUPON CODE (MISSING).
93827548 096DBR RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
There are a bunch of pages with the exact same blocks, but i need them to be combined into an ACTUAL table that looks like this:
BIN INV CODE STATUS
HA8DHW2HHD0138 FPBC-*SOUP CANS LENTILS RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8SHDNADU00A123 FPBC-*SOUP CANS LENTILS #2956- INVALID STOCK COUPON CODE (MISSING).
93827548096DBR FPBC-*SOUP CANS LENTILS RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8FHA9D8H82HG9F SSXR-98-20LM NM CORN CREAM RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
Essentially, all separate text blocks in this example would become part of this table, with the inv code repeating with its Bin values. I would post my attempts at parsing this data(have tried Pandas/bs/openpyxl/csv writer), but ill admit they are a little embarrassing, as i cannot find any information on this specific problem. Is there any benevolent soul out there that can help me out? :)
(Also, i am using Python 2.7)
A simple custom parser like the following should do the trick.
from __future__ import print_function
def parse_body(s):
line_sep = '\n'
getting_bins = False
inv_code = ''
for l in s.split(line_sep):
if l.startswith('INVENTORY CODE:') and not getting_bins:
inv_data = l.split()
inv_code = inv_data[2] + '-' + ' '.join(inv_data[3:])
elif l.startswith('INVENTORY CODE:') and getting_bins:
print("unexpected inventory code while reading bins:", l)
elif l.startswith('BIN') and l.endswith('MESSAGE'):
getting_bins = True
elif getting_bins == True and l:
bin_data = l.split()
# need to add exception handling here to make sure:
# 1) we have an inv_code
# 2) bin_data is at least 3 items big (assuming two for
# bin_id and at least one for message)
# 3) maybe some constraint checking to ensure that we have
# a valid instance of an inventory code and bin id
bin_id = ''.join(bin_data[0:2])
message = ' '.join(bin_data[2:])
# we now have a bin, an inv_code, and a message to add to our table
print(bin_id.ljust(20), inv_code.ljust(30), message, sep='\t')
elif getting_bins == True and not l:
# done getting bins for current inventory code
getting_bins = False
inv_code = ''
A rather complex one, but this might get you started:
import re, pandas as pd
from pandas import DataFrame
rx = re.compile(r'''
(?:INVENTORY\ CODE:)\s*
(?P<inv>.+\S)
[\s\S]+?
^BIN.+[\n\r]
(?P<bin_msg>(?:(?!^\ ).+[\n\r])+)
''', re.MULTILINE | re.VERBOSE)
string = your_string_here
# set up the dataframe
df = DataFrame(columns = ['BIN', 'INV', 'MESSAGE'])
for match in rx.finditer(string):
inv = match.group('inv')
bin_msg_raw = match.group('bin_msg').split("\n")
rxbinmsg = re.compile(r'^(?P<bin>(?:(?!\ {2}).)+)\s+(?P<message>.+\S)\s*$', re.MULTILINE)
for item in bin_msg_raw:
for m in rxbinmsg.finditer(item):
# append it to the dataframe
df.loc[len(df.index)] = [m.group('bin'), inv, m.group('message')]
print(df)
Explanation
It looks for INVENTORY CODE and sets up the groups (inv and bin_msg) for further processing in afterwork() (note: it would be easier if you had only one line of bin/msg as you need to split the group here afterwards).
Afterwards, it splits the bin and msg part and appends all to the df object.
I had a code written for a website scrapping which may help you.
Basically what you need to do is write click on the web page go to html and try to find the tag for the table you are looking for and using the module (i am using beautiful soup) extract the information. I am creating a json as I need to store it into mongodb you can create table.
#! /usr/bin/python
import sys
import requests
import re
from BeautifulSoup import BeautifulSoup
import pymongo
def req_and_parsing():
url2 = 'http://businfo.dimts.in/businfo/Bus_info/EtaByRoute.aspx?ID='
list1 = ['534UP','534DOWN']
for Route in list1:
final_url = url2 + Route
#r = requests.get(final_url)
#parsing_file(r.text,Route)
outdict = []
outdict = [parsing_file( requests.get(url2+Route).text,Route) for Route in list1 ]
print outdict
conn = f_connection()
for i in range(len(outdict)):
insert_records(conn,outdict[i])
def parsing_file(txt,Route):
soup = BeautifulSoup(txt)
table = soup.findAll("table",{"id" : "ctl00_ContentPlaceHolder1_GridView2"})
#trtags = table[0].findAll('tr')
tdlist = []
trtddict = {}
"""
for trtag in trtags:
print 'print trtag- ' , trtag.text
tdtags = trtag.findAll('td')
for tdtag in tdtags:
print tdtag.text
"""
divtags = soup.findAll("span",{"id":"ctl00_ContentPlaceHolder1_ErrorLabel"})
for divtag in divtags:
for divtag in divtags:
print "div tag - " , divtag.text
if divtag.text == "Currently no bus is running on this route" or "This is not a cluster (orange bus) route":
print "Page not displayed Errored with below meeeage for Route-", Route," , " , divtag.text
sys.exit()
trtags = table[0].findAll('tr')
for trtag in trtags:
tdtags = trtag.findAll('td')
if len(tdtags) == 2:
trtddict[tdtags[0].text] = sub_colon(tdtags[1].text)
return trtddict
def sub_colon(tag_str):
return re.sub(';',',',tag_str)
def f_connection():
try:
conn=pymongo.MongoClient()
print "Connected successfully!!!"
except pymongo.errors.ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
return conn
def insert_records(conn,stop_dict):
db = conn.test
print db.collection_names()
mycoll = db.stopsETA
mycoll.insert(stop_dict)
if __name__ == "__main__":
req_and_parsing()

LibreOffice - How to create a file dialog via python macro?

I'd like to know if it's possible to create a standard file dialog to save a pdf via a python macro. I've tried to write some code based on this outdated documentation: wiki.openoffice.org but LibreOffice crashes after execution:
import os
import uno
import sys
import traceback
from com.sun.star.ui.dialogs.TemplateDescription import FILESAVE_SIMPLE
def file_dialog():
try:
oCtx = uno.getComponentContext()
oServiceManager = oCtx.getServiceManager()
oFilePicker = oServiceManager.createInstanceWithArgumentsAndContext(
'com.sun.star.ui.dialogs.FilePicker',
(FILESAVE_SIMPLE,),
oCtx
)
oFilePicker.Title = 'Export as'
#oDisp = oFilePicker.Text
oFilePicker.execute()
except:
pass
#oDisp = traceback.format_exc(sys.exc_info()[2])
At the end I need to pass the selected path to write the document, but oDisp = oFilePicker.Text returns: (<type 'exceptions.AttributeError'>. Moreover is there a way to set the file type?
Does anyone have experience with it?
I used Xray on the oFilePicker object. There are a couple of interesting methods called setCurrentFilter and appendFilterGroup. Just based on the names, they might be used to filter what file types are visible. Unfortunately I'm not sure how to use them.
Also with Xray, I determined that Text is not a method or property of the oFilePicker object. I'm not sure what the code snippet is trying to do there? If retrieve the filepath, 1) that needs to be done after the .execute and 2) the selected filepath is stored as an array of strings, so the path has to be pulled out of the array. Most of my work in OpenOffice is in StarBasic; below is a working example in Basic of printing the filepath selected by the user:
Sub TestFilePicker
oFilePickerDlg = createUnoService( "com.sun.star.ui.dialogs.FilePicker" )
oFilePickerDlg.setTitle("My test title")
If oFilePickerDlg.execute() > 0 Then
Print ConvertFromURL(oFilePickerDlg.Files(0))
End If
End Sub
Answer given and accepted (because the question was cross posted!) here:
import uno
from com.sun.star.beans import PropertyValue
#shortcut:
createUnoService = (
XSCRIPTCONTEXT
.getComponentContext()
.getServiceManager()
.createInstance
)
def pypdf_test():
desktop = XSCRIPTCONTEXT.getDesktop()
doc = desktop.getCurrentComponent()
# filter data
fdata = []
fdata1 = PropertyValue()
fdata1.Name = "SelectPdfVersion"
fdata1.Value = 1
fdata2 = PropertyValue()
fdata2.Name = "Quality"
fdata2.Value = 100
fdata.append(fdata1)
fdata.append(fdata2)
args = []
arg1 = PropertyValue()
arg1.Name = "FilterName"
arg1.Value = "writer_web_pdf_Export"
arg2 = PropertyValue()
arg2.Name = "FilterData"
arg2.Value = uno.Any("[]com.sun.star.beans.PropertyValue", tuple(fdata) )
args.append(arg1)
args.append(arg2)
fileurl = FilePicker()
if fileurl:
doc.storeToURL( fileurl, tuple(args) )
def FilePicker(path=None, mode=1):
"""
Datei öffnen: `mode in (0, 6, 7, 8, 9)`
Datei Schreiben `mode in (1, 2, 3, 4, 5, 10)`
see: ('''http://api.libreoffice.org/docs/idl/ref/
namespacecom_1_1sun_1_1star_1_1ui_1_1
dialogs_1_1TemplateDescription.html''' )
"""
filepicker = createUnoService( "com.sun.star.ui.dialogs.OfficeFilePicker" )
if path:
filepicker.setDisplayDirectory(path )
filepicker.initialize( ( mode,) )
if filepicker.execute():
return filepicker.getFiles()[0]

Updating Code - Python Automation

I'm new to this site and new to Python--as in only a few days into a course. At work, I have inherited a good sized project that involves matching 9 digit zip codes in an excel file to their US congressional districts (from a website). I've noticed through investigation of the code (what little I know) is that the author might be using a website that only allows 5 digit zip codes, not 9 digits. Since some districts share zip codes, 9 digit codes are more precise. Here's the code I'm working with:
import urllib
import re
import csv
import datetime
print datetime.datetime.now()
INPUT_FILE_NAME = 'zip1.csv'
OUTPUT_FILE_NAME = 'legislator_output_%s-%0*d%0*d.csv' % ((datetime.date.today(), 2, datetime.datetime.now().hour, 2, datetime.datetime.now().minute))
print 'file name:', OUTPUT_FILE_NAME
input_file_handler = open(INPUT_FILE_NAME, 'rb')
input_reader = csv.reader(input_file_handler)
output_file_handler = open(OUTPUT_FILE_NAME, 'wb', 1)
output_writer = csv.writer(output_file_handler)
output_writer.writerow(['unique id', 'zip', 'plus 4', 'member url', 'member name', 'member district'])
fail_list = []
counter = 0
for input_line in input_reader:
zip_entry = '%s-%s' % (input_line[1], input_line[2])
unique_id = input_line[0]
counter += 1
#if counter > 25: continue
zip_part = zip_entry.split('-')[0]
plus_four_part = zip_entry.split('-')[1]
params = urllib.urlencode({'ZIP':zip_part, '%2B4':plus_four_part})
f = urllib.urlopen('http://www.house.gov/htbin/zipfind', params)
page_source = f.read()
#print page_source
relevant_section = re.findall(r'templateLanding(.*?)contentMain', page_source, re.DOTALL)
rep_info = re.findall('(.*?)', relevant_section[0])
rep_district_info = re.findall('is located in (.*?)\.', relevant_section[0])
try:
member_url = rep_info[0][0]
member_name = rep_info[0][1]
member_district = rep_district_info[0]
#member_district = rep_info[0][2]
except:
fail_list += [zip_entry]
member_url = ''
member_name = ''
member_district = ''
row_to_write = [unique_id, zip_part, plus_four_part, member_url, member_name, member_district, datetime.datetime.now()]
output_writer.writerow(row_to_write)
if counter % 50 == 0:
print counter, row_to_write
output_file_handler.close() print OUTPUT_FILE_NAME, 'closed at', datetime.datetime.now()
print len(fail_list), 'entries failed to lookup'
print counter, 'rows done at', datetime.datetime.now()
So, the author used site which only allows for five digits (the code is a couple of years old as is this site). I have no idea how to replace it correctly on a new site.
If anyone knows of a solution or can point me in the direction of resources that might help, I would much appreciate it. At the moment I'm lost!
For what I can see, you can query, for example, http://www.house.gov/htbin/findrep?ZIP=63333-1211
So you could replace the urllib call for
urllib.urlopen('http://www.house.gov/htbin/findrep', zip_entry)

Aggregating/optimizing object.save()?

I'm working on import feature which allows user to create django database models from selected csv file.
Models are related which each other with foreign keys and many-to-many fields.
There is a lot
object.save()
and Object.objects.get(...) in my code which, I suppose, cause it to run so slow.
When an error (for example integrity error) occurs, I need all the changes in database to be rolled back. So I'm using
transaction.atomic
decorator on my view and it works fine.
The problem is, my import is really slow. Parsing file containing ~2000 lines (which could possibly add about 1000 objects to my database) takes about 3 minutes, which is too long.
Is there a way to make it faster? I've read about
bulk_create
function, but "It does not work with many-to-many relationships.".
If this is important, I'm using postgresql.
EDIT:
File structure looks like this:
subject_name
day [A/B] begins_at - ends_at;lecturer_info
Then multiple lines like:
student_uid;student_info
Ok, here's the code.
def csv_import(market, csv_file):
lines = [line.strip().decode('utf-8') for line in csv_file.readlines()]
lines = [line for line in lines if line]
pattern = re.compile(r'[0-9]+;.+')
week_days = {
'monday': 0,
.
.
.
}
term, subject, lecturer, student = None, None, None, None
for number, line in enumerate(lines):
if not ';' in line:
subject = Subject(subject_id=number, name=line, market=market)
subject.save()
elif not pattern.match(line):
term_info, lecturer_info = line.split(';') # term_info - 'day begins_at - ends_at', lecturer_info - lecturer
term_info = term_info.replace(' - ', ' ').split()
term = Term(term_id=number, subject=subject, day=week_days[term_info[0]], begin_at=term_info[-2],
ends_at=term_info[-1])
if len(term_info) == 4:
term.week = term_info[1]
lecturer_info = lecturer_info.rsplit(' ', 1)
try:
lecturer = Lecturer.objects.get(first_name=lecturer_info[0], last_name=lecturer_info[1])
except Lecturer.DoesNotExist:
lecturer = Lecturer(first_name=lecturer_info[0], last_name=lecturer_info[1])
lecturer.save()
term.lecturer = lecturer
term.save()
else:
gradebook_id, student_info = line.split(';')
student_info = student_info.rsplit(' ', 1)
try:
student = TMUser.objects.get(uid=int(gradebook_id))
except TMUser.DoesNotExist:
student = TMUser(uid=int(gradebook_id), username='student'+gradebook_id, first_name=student_info[0],
last_name=student_info[1], password=make_password('passwd'), user_group='user')
student.save()
student.terms.add(term)
student.save()
This is some pseudo code to show you the basic idea of what I meant by caching results:
cache = {}
for number, line in enumerate(lines):
...
elif not pattern.match(line):
...
term = Term(term_id=number, subject=subject, ...)
lecturer_id = (lecturer_info[0], lecturer_info[1]) #first name and last
if cache[lecturer_id]:
#retrieve from cache
lecturer = cache[lecturer_id]
else:
try:
lecturer = Lecturer.objects.get(first_name= lecturer_id[0], last_name= lecturer_id[1])
except Lecturer.DoesNotExist:
lecturer = Lecturer(first_name= lecturer_id[0], last_name= lecturer_id[1])
lecturer.save()
#add to cache
cache[lecturer_id] = lecturer
term.lecturer = lecturer
term.save()
#etc.

Userfriendly way of handling config files in python?

I want to write a program that sends an e-mail to one or more specified recipients when a certain event occurs. For this I need the user to write the parameters for the mail server into a config. Possible values are for example: serveradress, ports, ssl(true/false) and a list of desired recipients.
Whats the user-friendliest/best-practice way to do this?
I could of course use a python file with the correct parameters and the user has to fill it out, but I wouldn't consider this user friendly. I also read about the 'config' module in python, but it seems to me that it's made for creating config files on its own, and not to have users fill the files out themselves.
Are you saying that the fact that the config file would need to be valid Python makes it unfriendly? It seems like having lines in a file like:
server = 'mail.domain.com'
port = 25
...etc would be intuitive enough while still being valid Python. If you don't want the user to have to know that they have to quote strings, though, you might go the YAML route. I use YAML pretty much exclusively for config files and find it very intuitive, and it would also be intuitive for an end user I think (though it requires a third-party module - PyYAML):
server: mail.domain.com
port: 25
Having pyyaml load it is simple:
>>> import yaml
>>> yaml.load("""a: 1
... b: foo
... """)
{'a': 1, 'b': 'foo'}
With a file it's easy too.
>>> with open('myconfig.yaml', 'r') as cfile:
... config = yaml.load(cfile)
...
config now contains all of the parameters.
I doesn't matter technically proficient your users are; you can count on them to screw up editing a text file. (They'll save it in the wrong place. They'll use MS Word to edit a text file. They'll make typos.) I suggest making a gui that validates the input and creates the configuration file in the correct format and location. A simple gui created in Tkinter would probably fit your needs.
I've been using ConfigParser. It's designed to read .ini style files that have:
[section]
option = value
It's quite easy to use and the documentation is pretty easy to read. Basically you just load the whole file into a ConfigParser object:
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('configfile.txt')
Then you can make sure the users haven't messed anything up by checking the options. I do so with a list:
OPTIONS =
['section,option,defaultvalue',
.
.
.
]
for opt in OPTIONS:
section,option,defaultval = opt.split(',')
if not config.has_option(section,option):
print "Missing option %s in section %s" % (option,section)
Getting the values out is easy too.
val = config.get('section','option')
And I also wrote a function that creates a sample config file using that OPTIONS list.
new_config = ConfigParser.ConfigParser()
for opt in OPTIONS:
section,option,defaultval = opt.split(',')
if not new_config.has_section(section):
new_config.add_section(section)
new_config.set(section, option, defaultval)
with open("sample_configfile.txt", 'wb') as newconfigfile:
new_config.write(newconfigfile)
print "Generated file: sample_configfile.txt"
What are the drawbacks of such a solution:
ch = 'serveradress = %s\nport = %s\nssl = %s'
a = raw_input("Enter the server's address : ")
b = 'a'
bla = "\nEnter the port : "
while not all(x.isdigit() for x in b):
b = raw_input(bla)
bla = "Take care: you must enter digits exclusively\n"\
+" Re-enter the port (digits only) : "
c = ''
bla = "\nChoose the ssl option (t or f) : "
while c not in ('t','f'):
c = raw_input(bla)
bla = "Take care: you must type f or t exclusively\n"\
+" Re-choose the ssl option : "
with open('configfile.txt','w') as f:
f.write(ch % (a,b,c))
.
PS
I've read in the jonesy's post that the value in a config file may have to be quoted. If so, and you want the user not to have to write him/her-self the quotes, you simply add
a = a.join('""')
b = b.join('""')
c = c.join('""')
.
EDIT
ch = 'serveradress = %s\nport = %s\nssl = %s'
d = {0:('',
"Enter the server's address : "),
1:("Take care: you must enter digits exclusively",
"Enter the port : "),
2:("Take care: you must type f or t exclusively",
"Choose the ssl option (t or f) : ") }
def func(i,x):
if x is None:
return False
if i==0:
return True
elif i==1:
try:
ess = int(x)
return True
except:
return False
elif i==2:
if x in ('t','f'):
return True
else:
return False
li = len(d)*[None]
L = range(len(d))
while True:
for n in sorted(L):
bla = d[n][1]
val = None
while not func(n,val):
val = raw_input(bla)
bla = '\n '.join(d[n])
li[n] = val.join('""')
decision = ''
disp = "\n====== If you choose to process, =============="\
+"\n the content of the file will be :\n\n" \
+ ch % tuple(li) \
+ "\n==============================================="\
+ "\n\nDo you want to process (type y) or to correct (type c) : "
while decision not in ('y','c'):
decision = raw_input(disp)
disp = "Do you want to process (type y) or to correct (type c) ? : "
if decision=='y':
break
else:
diag = False
while not diag:
vi = '\nWhat lines do you want to correct ?\n'\
+'\n'.join(str(j)+' - '+line for j,line in enumerate((ch % tuple(li)).splitlines()))\
+'\nType numbers of lines belonging to range(0,'+str(len(d))+') separated by spaces) :\n'
to_modify = raw_input(vi)
try:
diag = all(int(entry) in xrange(len(d)) for entry in to_modify.split())
L = [int(entry) for entry in to_modify.split()]
except:
diag = False
with open('configfile.txt','w') as f:
f.write(ch % tuple(li))
print '-*- Recording of the config file : done -*-'

Categories

Resources