python replace backslash - python

I'm trying to implement a simple helper class to interact with java-properties files. Fiddling with multiline properties I encountered a problem, that I can not get solved, maybe you can?
The unittest in the class first writes a multiline-property spanning over two lines to the property-file, then re-reads it and checks for equality. That just works. Now, if i use the class to add a third line to the property, it re-reads it with additional backslashes that I can't explain.
Here is my code:
#!/usr/bin/env python3
# -*- coding=UTF-8 -*-
import codecs
import os, re
import fileinput
import unittest
class ConfigParser:
reProp = re.compile(r'^(?P<key>[\.\w]+)=(?P<value>.*?)(?P<ext>[\\]?)$')
rePropExt = re.compile(r'(?P<value>.*?)(?P<ext>[\\]?)$')
files = []
def __init__(self, pathes=[]):
for path in pathes:
if os.path.isfile(path):
self.files.append(path)
def getOptions(self):
result = {}
key = ''
val = ''
with fileinput.input(self.files, inplace=False) as fi:
for line in fi:
m = self.reProp.match(line.strip())
if m:
key = m.group('key')
val = m.group('value')
result[key] = val
else:
m = self.rePropExt.match(line.rstrip())
if m:
val = '\n'.join((val, m.group('value')))
result[key] = val
fi.close()
return result
def setOptions(self, updates={}):
options = self.getOptions()
options.update(updates)
with fileinput.input(self.files, inplace=True) as fi:
for line in fi:
m = self.reProp.match(line.strip())
if m:
key = m.group('key')
nval = options[key]
nval = nval.replace('\n', '\\\n')
print('{}={}'.format(key,nval))
fi.close()
class test(unittest.TestCase):
files = ['test.properties']
props = {'test.m.a' : 'Johnson\nTanaka'}
def setUp(self):
for file in self.files:
f = codecs.open(file, encoding='utf-8', mode='w')
for key in self.props.keys():
val = self.props[key]
val = re.sub('\n', '\\\n', val)
f.write(key + '=' + val)
f.close()
def teardown(self):
pass
def test_read(self):
c = configparser(self.files)
for file in self.files:
for key in self.props.keys():
result = c.getOptions()
self.assertEqual(result[key],self.props[key])
def test_write(self):
c = ConfigParser(self.files)
changes = {}
for key in self.props.keys():
changes[key] = self.change_value(self.props[key])
c.setOptions(changes)
result = c.getOptions()
print('changes: ')
print(changes)
print('result: ')
print(result)
for key in changes.keys():
self.assertEqual(result[key],changes[key],msg=key)
def change_value(self, value):
return 'Smith\nJohnson\nTanaka'
if __name__ == '__main__':
unittest.main()
Output of the testrun:
C:\pyt>propertyfileparser.py
changes:
{'test.m.a': 'Smith\nJohnson\nTanaka'}
result:
{'test.m.a': 'Smith\nJohnson\\\nTanaka'}
Any hints welcome...

Since you are adding a backslash in front of new-lines when you are writing, you have to also remove them when you are reading. Uncommenting the line that substitutes '\n' with '\\n' solves the problem, but I expect this also means the file syntax is incorrect.
This happens only with the second line break, because you separate the value into an "oval" and an "nval" where the "oval" is the first line, and the "nval" the rest, and you only do the substitution on the nval.
It's also overkill to use regexp replacing to replace something that isn't a regexp. You can use val.replace('\n', '\\n') instead.
I'd do this parser very differently. Well, first of all, I wouldn't do it at all, I'd use an existing parser, but if I did, I'd read the file, line by line, while handling the line continuation issue, so that I had exactly one value per item in a list. Then I'd parse each item into a key and a value with a regexp, and stick that into a dictionary.
You instead parse each line separately and join continuation lines to the values after parsing, which IMO is completely backwards.

Related

Python set value for specific key in properties file

We have a sample .cfg file consist of key value pair. Based on user input, we need to update the value. I was thinking to update the value using configParser but file doesn't have any section (e.g. [My Section]). Based on the documentation it needs three values to set - section, key and value. Unfortunately, I will not be able to add any section marker, as this file is used by other tasks.
What would be the another way we can set the value based on key?
File example
some.status_file_mode = 1 # Some comment
some.example_time = 7200 # Some comment
As per the requirement, no change in the line. Spaces and comments needs to be same as is.
Use NamedTemporaryFile from the tempfile module it is not too hard to build a simple parser to update a file that looks like that:
Code:
def replace_key(filename, key, value):
with open(filename, 'rU') as f_in, tempfile.NamedTemporaryFile(
'w', dir=os.path.dirname(filename), delete=False) as f_out:
for line in f_in.readlines():
if line.startswith(key):
line = '='.join((line.split('=')[0], ' {}'.format(value)))
f_out.write(line)
# remove old version
os.unlink(filename)
# rename new version
os.rename(f_out.name, filename)
Test Code:
import os
import tempfile
replace_key('file1', 'some.example_time', 3)
Results:
some.status_file_mode = 1
some.example_time = 3
If you don't care about spacing, this works well for your case.
def replace_config(filename, key, value):
d = {}
with open(filename, "r+") as f:
for line in f:
k, v = line.split('=')
c = ""
try:
v, c = v.split('#')
except ValueError:
c = ""
d[k.strip()] = {'v': v.strip(), 'c': c.strip()}
f.seek(0)
f.truncate()
d[key]['v'] = value
for k, v in d.items():
if v["c"]:
text = "{} = {} # {}\n".format(k, v['v'], v['c'])
else:
text = "{} = {}\n".format(k, v['v'])
f.write(text)
replace_config('config.cfg', 'some.example_time', 3)

Python 2.7.3 IndentationError: expected an indented block - cant find mistake

I am new to python and trying to submit my HW on Coursera Data Science course. The environment there is VM running Python 2.7.3, the file tweet_sentiment.py I am trying to run has the following script within it:
import sys
import json
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
# hw()
# lines(sent_file)
# lines(tweet_file)
myfile = open(sys.argv[1], 'r')
lines = myfile.readlines()
mydict = {}
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
twit_file = open(sys.argv[2], 'r')
twit_lines = twit_file.readlines()
mylist = []
for line in twit_lines:
mylist.append(json.loads(line))
for listik in mylist:
twit_value = 0
twit_text = listik["text"]
twit_words = twit_text.split()
for word in twit_words:
if word in mydict:
twit_value = twit_value + 1
print float(twit_value)
if __name__ == '__main__':
main()
When running $ python tweet_sentiment.py I am getting the following error:
File "tweet_sentiment.py", line 25
key, value = line.split("\t")
^
IndentationError: expected an indented block
Thanks for any hints!
Sergey
Be careful! You're mixing tabs and spaces for indenting.
Often a tab is displayed as the equivalent of 8 spaces. So, when using the common practice of 4 spaces it looks like 2 levels of indentation, but is really only one.
When I examine your code in the editor, I can see that you've got in at least two places. Replace those tabs with 4 spaces.
As it says, you have an indentation error. Line 25 should be corrected to this:
def main():
...
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
You have to indent the line after a for block. Your code should look like:
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
Your code must be like this:
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
It's the same for all other for's.
Try:
import sys
import json
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
# hw()
# lines(sent_file)
# lines(tweet_file)
myfile = open(sys.argv[1], 'r')
lines = myfile.readlines()
mydict = {}
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
twit_file = open(sys.argv[2], 'r')
twit_lines = twit_file.readlines()
mylist = []
for line in twit_lines:
mylist.append(json.loads(line))
for listik in mylist:
twit_value = 0
twit_text = listik["text"]
twit_words = twit_text.split()
for word in twit_words:
if word in mydict:
twit_value = twit_value + 1
print float(twit_value)
if __name__ == '__main__':
main()
The indentation for the for statements is messed up. The for block has to be indented. Also, as stated before, Python will throw an error if you mix tabs and spaces. Use either all spaces(replace each tab with 4 spaces) or all tabs(replace every 4 spaces with a tab).
Use vim to open it. And then type the commoand :retab
It then shows the line not correctly indented.

Parsing specific contents in a file

I have a file that looks like this
!--------------------------------------------------------------------------DISK
[DISK]
DIRECTION = 'OK'
TYPE = 'normal'
!------------------------------------------------------------------------CAPACITY
[CAPACITY]
code = 0
ID = 110
I want to read sections [DISK] and [CAPACITY].. there will be more sections like these. I want to read the parameters defined under those sections.
I wrote a following code:
file_open = open(myFile,"r")
all_lines = file_open.readlines()
count = len(all_lines)
file_open.close()
my_data = {}
section = None
data = ""
for line in all_lines:
line = line.strip() #remove whitespace
line = line.replace(" ", "")
if len(line) != 0: # remove white spaces between data
if line[0] == "[":
section = line.strip()[1:]
data = ""
if line[0] !="[":
data += line + ","
my_data[section] = [bit for bit in data.split(",") if bit != ""]
print my_data
key = my_data.keys()
print key
Unfortunately I am unable to get those sections and the data under that. Any ideas on this would be helpful.
As others already pointed out, you should be able to use the ConfigParser module.
Nonetheless, if you want to implement the reading/parsing yourself, you should split it up into two parts.
Part 1 would be the parsing at file level: splitting the file up into blocks (in your example you have two blocks: DISK and CAPACITY).
Part 2 would be parsing the blocks itself to get the values.
You know you can ignore the lines starting with !, so let's skip those:
with open('myfile.txt', 'r') as f:
content = [l for l in f.readlines() if not l.startswith('!')]
Next, read the lines into blocks:
def partition_by(l, f):
t = []
for e in l:
if f(e):
if t: yield t
t = []
t.append(e)
yield t
blocks = partition_by(content, lambda l: l.startswith('['))
and finally read in the values for each block:
def parse_block(block):
gen = iter(block)
block_name = next(gen).strip()[1:-1]
splitted = [e.split('=') for e in gen]
values = {t[0].strip(): t[1].strip() for t in splitted if len(t) == 2}
return block_name, values
result = [parse_block(b) for b in blocks]
That's it. Let's have a look at the result:
for section, values in result:
print section, ':'
for k, v in values.items():
print '\t', k, '=', v
output:
DISK :
DIRECTION = 'OK'
TYPE = 'normal'
CAPACITY :
code = 0
ID = 110
Are you able to make a small change to the text file? If you can make it look like this (only changed the comment character):
#--------------------------------------------------------------------------DISK
[DISK]
DIRECTION = 'OK'
TYPE = 'normal'
#------------------------------------------------------------------------CAPACITY
[CAPACITY]
code = 0
ID = 110
Then parsing it is trivial:
from ConfigParser import SafeConfigParser
parser = SafeConfigParser()
parser.read('filename')
And getting data looks like this:
(Pdb) parser
<ConfigParser.SafeConfigParser instance at 0x100468dd0>
(Pdb) parser.get('DISK', 'DIRECTION')
"'OK'"
Edit based on comments:
If you're using <= 2.7, then you're a little SOL.. The only way really would be to subclass ConfigParser and implement a custom _read method. Really, you'd just have to copy/paste everything in Lib/ConfigParser.py and edit the values in line 477 (2.7.3):
if line.strip() == '' or line[0] in '#;': # add new comment characters in the string
However, if you're running 3'ish (not sure what version it was introduced in offhand, I'm running 3.4(dev)), you may be in luck: ConfigParser added the comment_prefixes __init__ param to allow you to customize your prefix:
parser = ConfigParser(comment_prefixes=('#', ';', '!'))
If the file is not big, you can load it and use Regexes to find parts that are of interest to you.

Make python configobj to not put a space before and after the '='

Simple question. It is possible to make configobj to not put a space before and after the '=' in a configuration entry ?
I'm using configobj to read and write a file that is later processed by a bash script, so putting an antry like:
VARIABLE = "value"
breaks the bash script, it needs to always be:
VARIABLE="value"
Or if someone has another suggestion about how to read and write a file with this kind of entries (and restrictions) is fine too.
Thanks
I was looking into same and modified configobj.py by changing line 1980 in:
def _write_line(self, indent_string, entry, this_entry, comment)
from:
self._a_to_u(' = ')
to:
self._a_to_u('=')
After the change the output is without the space before and after equal sign.
Configobj is for reading and writing ini-style config files. You are apparently trying to use it to write bash scripts. That's not something that is likely to work.
Just write the bash-script like you want it to be, perhaps using a template or something instead.
To make ConfigParses not write the spaces around the = probably requires that you subclass it. I would guess that you have to modify the write method, but only reading the code can help there. :-)
Well, as suggested, I ended up writing my own parser for this that can be used exactly in the same way as ConfigObj:
config = MyConfigParser("configuration_file")
print config["CONFIG_OPTION_1"]
config["CONFIG_OPTION_1"]= "Value 1"
print config["CONFIG_OPTION_1
config.write()
This is the code if someone is interested or wants to give suggestions (I started coding in python not so long ago so probably there are lots of room for improvement). It respects the comments and the order of the options in the file, and correctly scapes and adds double quotes where needed:
import os
import sys
class MyConfigParser:
name = 'MyConfigParser'
debug = False
fileName = None
fileContents = None
configOptions = dict()
def __init__(self, fileName, debug=False):
self.fileName = fileName
self.debug = debug
self._open()
def _open(self):
try:
with open(self.fileName, 'r') as file:
for line in file:
#If it isn't a comment get the variable and value and put it on a dict
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
val = val.strip()
val = val.strip('\"')
val = val.strip('\'')
self.configOptions[key.strip()] = val
except:
print "ERROR: File " + self.fileName + " Not Found\n"
def write(self):
try:
#Write the file contents
with open(self.fileName, 'r+') as file:
lines = file.readlines()
#Truncate file so we don't need to close it and open it again
#for writing
file.seek(0)
file.truncate()
i = 0
#Loop through the file to change with new values in dict
for line in lines:
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
try:
if key in line:
newVal = self.configOptions[key]
#Only update if the variable value has changed
if val != newVal:
newLine = key + "=\"" + newVal + "\"\n"
line = newLine
except:
continue
i +=1
file.write(line)
except IOError as e:
print "ERROR opening file " + self.fileName + ": " + e.strerror + "\n"
#Redefinition of __getitem__ and __setitem__
def __getitem__(self, key):
try:
return self.configOptions.__getitem__(key)
except KeyError as e:
if isinstance(key,int):
keys = self.configOptions.keys()
return self.configOptions[keys[key]]
else:
raise KeyError("Key " +key+ " doesn't exist")
def __setitem__(self,key,value):
self.configOptions[key] = value
As suggested above, it is possible to remove the spaces either side of the equals sign by making a small change to the _write_line method. This can be done conveniently by subclassing ConfigObj and overwriting _write_line as follows -
from configobj import ConfigObj
class MyConfigObj(ConfigObj):
def __init__(self, *args, **kwargs):
ConfigObj.__init__(self, *args, **kwargs)
def _write_line(self, indent_string, entry, this_entry, comment):
"""Write an individual line, for the write method"""
# NOTE: the calls to self._quote here handles non-StringType values.
if not self.unrepr:
val = self._decode_element(self._quote(this_entry))
else:
val = repr(this_entry)
return '%s%s%s%s%s' % (indent_string,
self._decode_element(self._quote(entry, multiline=False)),
self._a_to_u('='),
val,
self._decode_element(comment))
Then just use MyConfigObj in place of ConfigObj and all the functionality of ConfigObj is maintained
As Lennart suggests, configobj is probably not the right tool for the job: how about:
>>> import pipes
>>> def dict2bash(d):
... for k, v in d.iteritems():
... print "%s=%s" % (k, pipes.quote(v))
...
>>> dict2bash({'foo': "bar baz quux"})
foo='bar baz quux'
since configobj returns something that looks a lot like a dict, you could probably still use it to read the data you are trying to process.
First of all, thanks Juancho. That's what i was looking for. But i edited the ConfigParser a little bit. Now it can handle bash script arrays in form of:
# Network interfaces to be configured
ifaces=( "eth0" "eth1" "eth2" "eth3" )
If you set a value it just proves if an value is a list and if, it sets the quotes correctly. So you can set values still the same way, even it is a list:
ifaces = ['eth0', 'eth1', 'eth2', 'eth3']
conf['ifaces'] = ifaces
Here's the code:
import os
import sys
class MyConfigParser:
name = 'MyConfigParser'
debug = False
fileName = None
fileContents = None
configOptions = dict()
qouteOptions = dict()
def __init__(self, fileName, debug=False):
self.fileName = fileName
self.debug = debug
self._open()
def _open(self):
try:
with open(self.fileName, 'r') as file:
for line in file:
#If it isn't a comment get the variable and value and put it on a dict
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
val = val.strip()
val = val.strip('\"')
val = val.strip('\'')
self.configOptions[key.strip()] = val
if val.startswith("("):
self.qouteOptions[key.strip()] = ''
else:
self.qouteOptions[key.strip()] = '\"'
except:
print "ERROR: File " + self.fileName + " Not Found\n"
def write(self):
try:
#Write the file contents
with open(self.fileName, 'r+') as file:
lines = file.readlines()
#Truncate file so we don't need to close it and open it again
#for writing
file.seek(0)
file.truncate()
#Loop through the file to change with new values in dict
for line in lines:
if not line.startswith("#") and len(line) > 1:
(key, val) = line.rstrip('\n').split('=')
try:
if key in line:
quotes = self.qouteOptions[key]
newVal = quotes + self.configOptions[key] + quotes
#Only update if the variable value has changed
if val != newVal:
newLine = key + "=" + newVal + "\n"
line = newLine
except:
continue
file.write(line)
except IOError as e:
print "ERROR opening file " + self.fileName + ": " + e.strerror + "\n"
#Redefinition of __getitem__ and __setitem__
def __getitem__(self, key):
try:
return self.configOptions.__getitem__(key)
except KeyError as e:
if isinstance(key,int):
keys = self.configOptions.keys()
return self.configOptions[keys[key]]
else:
raise KeyError("Key " + key + " doesn't exist")
def __setitem__(self, key, value):
if isinstance(value, list):
self.qouteOptions[key] = ''
value_list = '('
for item in value:
value_list += ' \"' + item + '\"'
value_list += ' )'
self.configOptions[key] = value_list
else:
self.qouteOptions[key] = '\"'
self.configOptions[key] = value

How to get only the values from the file while parsing it?

I am parsing a config file I would like to get only the values which are file paths in the file
for example the config file has
apache.access = /var/log/apache2/access.log
apache.errors = /var/log/apache2/errors.log
I would like to get only '/var/log/apache2/access.log' and '/var/log/apache2/errors.log' from the
COMMENT_CHAR = '#'
OPTION_CHAR = '='
def parse_config(filename):
options = {}
f = open(filename)
for line in f:
if COMMENT_CHAR in line:
line, comment = line.split(COMMENT_CHAR, 1)
if OPTION_CHAR in line:
option, value = line.split(OPTION_CHAR, 1)
option = option.strip()
value = value.strip()
options[option] = value
f.close()
return options
I tried this but it gives all the options and values in the file
apache.access : /var/log/apache2/access.log
apache.errors : /var/log/apache2/errors.log
First you have to define, what is a file path (1. does it have to exist? 2. does it have to be creatable in an existing directory? 3. anything else?). This tests whether the given string is a path pointing to an existing dir/file:
import os
# ...
[v for v in options.itervalues() if os.path.exists(v)]
Regular expressions is what you need. Here is little example how it could look
import re
f = open("apache.log")
for line in f:
m = re.match("apache\.(access|errors)\s+=\s+(.+)", line)
if None != m:
print (m.group(2))
f.close()

Categories

Resources