I would like to parse a config file, containing list of filenames, divided in sections:
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
..
I tried ConfigParser, but it requires pairs of name-value. How can I parse such a file?
Likely you have to implement the parser on your own.
Blueprint:
key = None
current = list()
for line in file(...):
if line.startswith('['):
if key:
print key, current
key = line[1:-1]
current = list()
else:
current.append(line)
Here is an iterator/generator solution:
data = """\
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
...""".splitlines()
def sections(it):
nextkey = next(it)
fin = False
while not fin:
key = nextkey
body = ['']
try:
while not body[-1].startswith('['):
body.append(next(it))
except StopIteration:
fin = True
else:
nextkey = body.pop(-1)
yield key, body[1:]
print dict(sections(iter(data)))
# if reading from a file, do: dict(sections(file('filename.dat')))
Related
I have a directory with many pickled files, each containing a dictionary. The filename indicates the setting of the dictionary. E.g.: 20NewsGroup___10___Norm-False___Probs-True___euclidean.pickle.
I want to combine these different dicts all in one large dict. To do this, I have written the following code:
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
However, when I run this, I get a KeyError, as the key for splitted[0] does not yet exist. What is the best way to populate a dictionary similar to what I envision?
you need to create in these fields
Example:
from typing import List
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
def check_exist_path(target_dict, paths: List[str]):
for key in paths:
if key not in target_dict:
target_dict[key] = {}
target_dict = target_dict[key]
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
for i in range(5):
check_exist_path(one_dict, [splitted[j] for j in range(i)])
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
You could use try-except blocks:
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
try:
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
except KeyError:
one_dict[splitted[0]] = {}
one_dict[splitted[0]][splitted[1]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
This code tries to access nested dictionary keys, if they don't exist, then the code will create them with an empty dictionary.
i have a text file with the below contents
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
I have a block of code that is supposed to get the value xxxxxxxxx or yyyyyyyyy based on the env value passed(prod or test)
#!/usr/bin/python
import os
class test:
def __init__(self, env):
self.env = env
def func(self):
res = []
try:
if os.path.exists("file.txt"):
try:
with open("file.txt", 'r') as fp:
for line in fp:
print("line is " +line)
line_api = line.split(',')[2]
print(line_api)
res.append(line_api)
print(res)
if self.env == "prod":
self.api = res[0]
print(self.api)
else:
self.api = res[1]
print(self.api)
except Exception as e:
print(e)
except Exception as e:
print(e)
value when else part is executed
list index out of range
Now when the env passed is prod the function works but when the value is test and the else part is executed the value of list res is only xxxxxx, there is only one value in list and the code self.api = res[1] fails. print(res) only prints ['xxxxxxxxxxxx\n'] but when url passed is url1 print(res) only prints both ['xxxxxxxxxxx\n', 'yyyyyyyyy \n']
What is wrong with my code?
The issue with your code is that you split each line that you read in with line_api = line.split(',')[2] and the [2] is referencing the third element that exists in that list which is xxxxxxxxx or yyyyyyyyy, then when you call self.api = res[0] you reference the first (and only) element in that list.
self.api = res[1] will always throw an error because it will never exist in the 1 element list. I'm not sure what the goal was for this else statement, but I would suggest using DirtyBit's elegant solution
Since it is hard to debug your already excessive code, Here is a shorter snippet:
Using startswith():
list.txt:
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
Hence:
logFile = "list.txt"
def getUrlValue(url):
with open(logFile) as f:
content = f.readlines()
# you may also want to remove empty lines
content = [l.strip() for l in content if l.strip()]
for line in content:
if line.startswith(url):
print(line.split(',')[2])
getUrlValue("url1")
getUrlValue("url2")
OUTPUT:
xxxxxxxxx
yyyyyyyyy
Here is my code :
filenames = []
data = []
found = False
axconfig = open('axcfgpasww.dat', "r")
dictionnary = open("hello.txt", "r")
output_value = "bonjour"
for line in axconfig:
for word in dictionnary:
element = word.split("=")[0]
if element in axconfig:
# old_value = line.split("=")
# data.append(line.replace(old_value, output_value+'\n'))
# found = True
# else:
# data.append(line)
print(element)
if not found:
print('No match found')
# Create a new file, from data array
with open("olivier.txt", 'w') as outfile:
for line in data:
outfile.write(line)
The dynamic value is "element".
I have a .txt file, that have keywords and want to look "element" with the values inside the .txt file.
But my code doesn't work..
It only adds in olivier.txt the first line of axcfgpasww.dat..
And is also returning "No match found"
Thanks guys !
edit :
axcfgpasww.dat
T72_BANK_IDENTIFIER_CODE_3=SOAPFR22
T72_ISSUER_COUNTRY_CODE_3=FR
T72_MERCHANT_ID_3=
T72_VAS_SCHEME_IDENTIFIER_3=
hello.txt
T72_VAS_SERVICE_IDENTIFIER_7=
T72_VAS_SCHEME_IDENTIFIER_3=
The problem is with the line:
if element in axconfig:
try instead:
if element in line.split("=")
For my convenience if I have considered both as text files.
Below code will match each line of axconfig with hello and then print whatever is matched and also will store that value is a variable called "value"
filenames = []
data = []
found = False
value = ""
axconfig = open('axcfgpasww.txt', "r")
lines = axconfig.readlines()
dictionnary = open("hello.txt", "r")
output_value = "bonjour"
for line in lines:
match = line.split("=")[0]
for word in dictionnary:
element = word.split("=")[0]
if element == match:
print(element)
value = element
Simple question. It is possible to make configobj to not put a space before and after the '=' in a configuration entry ?
I'm using configobj to read and write a file that is later processed by a bash script, so putting an antry like:
VARIABLE = "value"
breaks the bash script, it needs to always be:
VARIABLE="value"
Or if someone has another suggestion about how to read and write a file with this kind of entries (and restrictions) is fine too.
Thanks
I was looking into same and modified configobj.py by changing line 1980 in:
def _write_line(self, indent_string, entry, this_entry, comment)
from:
self._a_to_u(' = ')
to:
self._a_to_u('=')
After the change the output is without the space before and after equal sign.
Configobj is for reading and writing ini-style config files. You are apparently trying to use it to write bash scripts. That's not something that is likely to work.
Just write the bash-script like you want it to be, perhaps using a template or something instead.
To make ConfigParses not write the spaces around the = probably requires that you subclass it. I would guess that you have to modify the write method, but only reading the code can help there. :-)
Well, as suggested, I ended up writing my own parser for this that can be used exactly in the same way as ConfigObj:
config = MyConfigParser("configuration_file")
print config["CONFIG_OPTION_1"]
config["CONFIG_OPTION_1"]= "Value 1"
print config["CONFIG_OPTION_1
config.write()
This is the code if someone is interested or wants to give suggestions (I started coding in python not so long ago so probably there are lots of room for improvement). It respects the comments and the order of the options in the file, and correctly scapes and adds double quotes where needed:
import os
import sys
class MyConfigParser:
name = 'MyConfigParser'
debug = False
fileName = None
fileContents = None
configOptions = dict()
def __init__(self, fileName, debug=False):
self.fileName = fileName
self.debug = debug
self._open()
def _open(self):
try:
with open(self.fileName, 'r') as file:
for line in file:
#If it isn't a comment get the variable and value and put it on a dict
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
val = val.strip()
val = val.strip('\"')
val = val.strip('\'')
self.configOptions[key.strip()] = val
except:
print "ERROR: File " + self.fileName + " Not Found\n"
def write(self):
try:
#Write the file contents
with open(self.fileName, 'r+') as file:
lines = file.readlines()
#Truncate file so we don't need to close it and open it again
#for writing
file.seek(0)
file.truncate()
i = 0
#Loop through the file to change with new values in dict
for line in lines:
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
try:
if key in line:
newVal = self.configOptions[key]
#Only update if the variable value has changed
if val != newVal:
newLine = key + "=\"" + newVal + "\"\n"
line = newLine
except:
continue
i +=1
file.write(line)
except IOError as e:
print "ERROR opening file " + self.fileName + ": " + e.strerror + "\n"
#Redefinition of __getitem__ and __setitem__
def __getitem__(self, key):
try:
return self.configOptions.__getitem__(key)
except KeyError as e:
if isinstance(key,int):
keys = self.configOptions.keys()
return self.configOptions[keys[key]]
else:
raise KeyError("Key " +key+ " doesn't exist")
def __setitem__(self,key,value):
self.configOptions[key] = value
As suggested above, it is possible to remove the spaces either side of the equals sign by making a small change to the _write_line method. This can be done conveniently by subclassing ConfigObj and overwriting _write_line as follows -
from configobj import ConfigObj
class MyConfigObj(ConfigObj):
def __init__(self, *args, **kwargs):
ConfigObj.__init__(self, *args, **kwargs)
def _write_line(self, indent_string, entry, this_entry, comment):
"""Write an individual line, for the write method"""
# NOTE: the calls to self._quote here handles non-StringType values.
if not self.unrepr:
val = self._decode_element(self._quote(this_entry))
else:
val = repr(this_entry)
return '%s%s%s%s%s' % (indent_string,
self._decode_element(self._quote(entry, multiline=False)),
self._a_to_u('='),
val,
self._decode_element(comment))
Then just use MyConfigObj in place of ConfigObj and all the functionality of ConfigObj is maintained
As Lennart suggests, configobj is probably not the right tool for the job: how about:
>>> import pipes
>>> def dict2bash(d):
... for k, v in d.iteritems():
... print "%s=%s" % (k, pipes.quote(v))
...
>>> dict2bash({'foo': "bar baz quux"})
foo='bar baz quux'
since configobj returns something that looks a lot like a dict, you could probably still use it to read the data you are trying to process.
First of all, thanks Juancho. That's what i was looking for. But i edited the ConfigParser a little bit. Now it can handle bash script arrays in form of:
# Network interfaces to be configured
ifaces=( "eth0" "eth1" "eth2" "eth3" )
If you set a value it just proves if an value is a list and if, it sets the quotes correctly. So you can set values still the same way, even it is a list:
ifaces = ['eth0', 'eth1', 'eth2', 'eth3']
conf['ifaces'] = ifaces
Here's the code:
import os
import sys
class MyConfigParser:
name = 'MyConfigParser'
debug = False
fileName = None
fileContents = None
configOptions = dict()
qouteOptions = dict()
def __init__(self, fileName, debug=False):
self.fileName = fileName
self.debug = debug
self._open()
def _open(self):
try:
with open(self.fileName, 'r') as file:
for line in file:
#If it isn't a comment get the variable and value and put it on a dict
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
val = val.strip()
val = val.strip('\"')
val = val.strip('\'')
self.configOptions[key.strip()] = val
if val.startswith("("):
self.qouteOptions[key.strip()] = ''
else:
self.qouteOptions[key.strip()] = '\"'
except:
print "ERROR: File " + self.fileName + " Not Found\n"
def write(self):
try:
#Write the file contents
with open(self.fileName, 'r+') as file:
lines = file.readlines()
#Truncate file so we don't need to close it and open it again
#for writing
file.seek(0)
file.truncate()
#Loop through the file to change with new values in dict
for line in lines:
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
try:
if key in line:
quotes = self.qouteOptions[key]
newVal = quotes + self.configOptions[key] + quotes
#Only update if the variable value has changed
if val != newVal:
newLine = key + "=" + newVal + "\n"
line = newLine
except:
continue
file.write(line)
except IOError as e:
print "ERROR opening file " + self.fileName + ": " + e.strerror + "\n"
#Redefinition of __getitem__ and __setitem__
def __getitem__(self, key):
try:
return self.configOptions.__getitem__(key)
except KeyError as e:
if isinstance(key,int):
keys = self.configOptions.keys()
return self.configOptions[keys[key]]
else:
raise KeyError("Key " + key + " doesn't exist")
def __setitem__(self, key, value):
if isinstance(value, list):
self.qouteOptions[key] = ''
value_list = '('
for item in value:
value_list += ' \"' + item + '\"'
value_list += ' )'
self.configOptions[key] = value_list
else:
self.qouteOptions[key] = '\"'
self.configOptions[key] = value
I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person