Parse the file section wise in python

Parse the file section wise in python - python

I have a text (abc.txt) file having the following entry in text file:
[General]
Local=C:\Work\July\
path=C:\Work\July\abc
[Field1]
BB0B2BA8--EFE4-4567-B8AE-0204D4BF9F60=
[CustDetails]
BB0B2BA8-EFE4-4567-B8AE-0204D4BF9F60=NOthing
[DirName]
8e27822e-5f46-4f41=TEST
[URLNAME]
8e27822e-5f46=https://
[DestURL]
8e27822e-5f46=some_URL
I want to parse the abc.txt file and take into variable. like in variable
MYpath = C:\Work\July\abc
custdetails= Nothing
dir_name = TEST
URL_Name = https://
DestURL = some_URL
Thanks,

Using ConfigParser:
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('abc.txt')
dic = {}
for section in config.sections():
for option in config.options(section):
res = config.get(section, option)
if res == '':
continue
dic.update({section: res})
print dic
Output:
{'DestURL': 'some_URL', 'URLNAME': 'https://', 'CustDetails': 'NOthing', 'DirName': 'TEST', 'General': 'C:\\Work\\July\\abc'}

You can use a dict here:
>>> dic = {}
with open('abc.txt') as f:
data = f.read()
lines = data.split('\n\n')
for line in lines:
line = line.split('\n')
field = line[0].strip('[]')
val = line[-1].split('=')[1]
if val:
dic[field] = val
...
>>> dic
{'DestURL': 'some_URL',
'URLNAME': 'https://',
'CustDetails': 'NOthing',
'DirName': 'TEST',
'General': 'C:\\Work\\July\\abc'}

Related

Add values to unexisting keys in dictionary

I have a directory with many pickled files, each containing a dictionary. The filename indicates the setting of the dictionary. E.g.: 20NewsGroup___10___Norm-False___Probs-True___euclidean.pickle.
I want to combine these different dicts all in one large dict. To do this, I have written the following code:
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
However, when I run this, I get a KeyError, as the key for splitted[0] does not yet exist. What is the best way to populate a dictionary similar to what I envision?

you need to create in these fields
Example:
from typing import List
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
def check_exist_path(target_dict, paths: List[str]):
for key in paths:
if key not in target_dict:
target_dict[key] = {}
target_dict = target_dict[key]
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
for i in range(5):
check_exist_path(one_dict, [splitted[j] for j in range(i)])
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict

You could use try-except blocks:
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
try:
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
except KeyError:
one_dict[splitted[0]] = {}
one_dict[splitted[0]][splitted[1]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
This code tries to access nested dictionary keys, if they don't exist, then the code will create them with an empty dictionary.

internal error occured after json decoding 1100 elements using python

I am using python to decode an api response,which is in json format.After 1100 data checking this error occurs
<Response [200]>
{'error': 'internal error'}
Traceback (most recent call last):
File "E:/freelancer_projects/ahref/app2.py", line 63, in <module>
get_rating()
File "E:/freelancer_projects/ahref/app2.py", line 46, in get_rating
result.append((u,data["domain"]["domain_rating"]))
KeyError: 'domain'
As you can see the response from the api is ok,but the error just saying its internal.Why is this error and what will be its fix ? I am using requests library to get data from api
full code:
def get_rating():
fr = input("Enter filename of urls:")
token = input("Enter token:")
f = open(fr, "r",encoding="utf8")
urls = f.readlines()
result=[]
count=1
for u in urls:
u = u.replace("http://", "")
u = u.replace("https://", "")
if "www" not in u:
u="www"+u
ind = u.find("www")
u = u[ind:]
u=u.replace('\n', '')
u=u[:-1]
# print(u)
data=getPageData(u,token)
if data != "":
print(data)
# print(data)
# print(data["domain"]["domain_rating"])
result.append((u,data["domain"]["domain_rating"]))
print(count)
count+=1
# print(result)
result=sorted(result,key=lambda x:x[1],reverse=True)
# print(result)
saveData(result)
def saveData(result):
wb = openpyxl.Workbook()
sheet =wb.get_sheet_by_name('Sheet')
sheet.cell(row=1, column=1).value = "URL"
sheet.cell(row=1, column=2).value = "DOMAIN RATING"
for index,r in enumerate(result):
sheet.cell(row=index+2, column=1).value = r[0]
sheet.cell(row=index+2, column=2).value = r[1]
wb.save("output.xlsx")
get_rating()

In this line of your code:
result.append((u,data["domain"]["domain_rating"]))
You are trying to access a key in the data dictionary that does not exist, resulting in your error:
KeyError: 'domain'
You'd best check if the key exists in your dictionary before accessing, like this:
if "domain" in data:
print('found domain')
On a side note, using this code
if data != ""
to check your dictionary isn't a very good idea, it will always be True, see this example in Python interpreter:
>>> mydict = dict(foo='foo', bar='bar')
>>> mydict
{'foo': 'foo', 'bar': 'bar'}
>>> mydict != ""
True
>>> mydict2 = dict()
>>> mydict2
{}
>>> mydict2 != ""
True
>>> mydict2 = None
>>> mydict2 != ""
True
As you can see, with an empty dict, and even if we set mydict2 to None, we satisfy the condition, but your code below would fail.

trying to create a dictionary from a text file but

so, I have text file (a paragraph) and I need to read the file and create a dictionary containing each different word from the file as a key and the corresponding value for each key will be an integer showing the frequency of the word in the text file.
an example of what the dictionary should look like:
{'and':2, 'all':1, 'be':1, 'is':3} etc.
so far I have this,
def create_word_frequency_dictionary () :
filename = 'dictionary.txt'
infile = open(filename, 'r')
line = infile.readline()
my_dictionary = {}
frequency = 0
while line != '' :
row = line.lower()
word_list = row.split()
print(word_list)
print (word_list[0])
words = word_list[0]
my_dictionary[words] = frequency+1
line = infile.readline()
infile.close()
print (my_dictionary)
create_word_frequency_dictionary()
any help would be appreciated thanks.

Documentation defines collections module as "High-performance container datatypes". Consider using collections.Counter instead of re-inventing the wheel.
from collections import Counter
filename = 'dictionary.txt'
infile = open(filename, 'r')
text = str(infile.read())
print(Counter(text.split()))
Update:
Okay, I fixed your code and now it works, but Counter is still a better option:
def create_word_frequency_dictionary () :
filename = 'dictionary.txt'
infile = open(filename, 'r')
lines = infile.readlines()
my_dictionary = {}
for line in lines:
row = str(line.lower())
for word in row.split():
if word in my_dictionary:
my_dictionary[word] = my_dictionary[word] + 1
else:
my_dictionary[word] = 1
infile.close()
print (my_dictionary)
create_word_frequency_dictionary()

If you are not using version of python which has Counter:
>>> import collections
>>> words = ["a", "b", "a", "c"]
>>> word_frequency = collections.defaultdict(int)
>>> for w in words:
... word_frequency[w] += 1
...
>>> print word_frequency
defaultdict(<type 'int'>, {'a': 2, 'c': 1, 'b': 1})

Just replace my_dictionary[words] = frequency+1 with my_dictionary[words] = my_dictionary[words]+1.

file reading in python

So my whole problem is that I have two files one with following format(for Python 2.6):
#comments
config = {
#comments
'name': 'hello',
'see?': 'world':'ABC',CLASS=3
}
This file has number of sections like this. Second file has format:
[23]
[config]
'name'='abc'
'see?'=
[23]
Now the requirement is that I need to compare both files and generate file as:
#comments
config = {
#comments
'name': 'abc',
'see?': 'world':'ABC',CLASS=3
}
So the result file will contain the values from the first file, unless the value for same attribute is there in second file, which will overwrite the value. Now my problem is how to manipulate these files using Python.
Thanks in advance and for your previous answers in short time ,I need to use python 2.6

Was unable to find a beautiful solution due to the comments. This is tested and works for me, but requires Python 3.1 or higher:
from collections import OrderedDict
indenting = '\t'
def almost_py_read(f):
sections = OrderedDict()
contents = None
active = sections
for line in f:
line = line.strip()
if line.startswith('#'):
active[line] = None
elif line.endswith('{'):
k = line.split('=')[0].strip()
contents = OrderedDict()
active = contents
sections[k] = contents
elif line.endswith('}'):
active = sections
else:
try:
k, v = line.split(':')
k = k.strip()
v = v.strip()
active[k] = v
except:
pass
return sections
def almost_ini_read(f):
sections = OrderedDict()
contents = None
for line in f:
line = line.strip()
try:
k, v = line.split('=')
k = k.strip()
v = v.strip()
if v:
contents[k] = v
except:
if line.startswith('[') and line.endswith(']'):
contents = OrderedDict()
sections[line[1:-1]] = contents
print(sections)
return sections
def compilefiles(pyname, ininame):
sections = almost_py_read(open(pyname, 'rt'))
override_sections = almost_ini_read(open(ininame, "rt"))
for section_key, section_value in override_sections.items():
if not sections.get(section_key):
sections[section_key] = OrderedDict()
for k, v in section_value.items():
sections[section_key][k] = v
return sections
def output(d, indent=''):
for k, v in d.items():
if v == None:
print(indent+k)
elif v:
if type(v) == str:
print(indent+k+': '+v+',')
else:
print(indent+k+' = {')
output(v, indent+indenting)
print(indent+'}')
d = compilefiles('a.txt', 'b.ini')
output(d)
Output:
#comments
config = {
#comments
'name': 'abc',
'see?': 'world',
}

I had a really long and hard time to manage to write the following code.
I had difficulties to manage with commas. I wanted the updated file to have after the updating the same format as the file to update before the updating : lines end with a comma, except for the last one.
This code is crafted for the particular problem as exposed by the questioner and can't be used as-is for another type of problem. I know. It's the problem of using a code based on regex and not on a parser, I'm fully aware of that. But I think that it is a canvas that can be relatively easily adapted to other cases, by changing the regexes, which is a relatively readily process thanks to the malleability of regexes.
def file_updating(updating_filename,updating_data_extractor,filename_to_update):
# function whose name is hold by updating_data_extractor parameter
# is a function that
# extracts data from the file whose name is hold by updating_filename parameter
# and must return a tuple:
# ( updating dictionary , compiled regex )
updating_dico,pat = updating_data_extractor( updating_filename )
with open(filename_to_update,'r+') as f:
lines = f.readlines()
def jiji(line,dico = updating_dico ):
mat = pat.search(line.rstrip())
if mat and mat.group(3) in dico:
return '%s: %s,' % (mat.group(1),dico.pop(mat.group(3)))
else:
return line.rstrip(',') + ','
li = [jiji(line) for line in lines[0:-1] ] # [0:-1] because last line is '}'
front = (mit.group(2) for mit in ( pat.search(line) for line in lines ) if mit).next()
li.extend(front + '%s: %s,' % item for item in updating_dico.iteritems() )
li[-1] = li[-1].rstrip(',')
li.append('}')
f.seek(0,0)
f.writelines( '\n'.join(li) )
f.truncate()
Exemplifying code:
import re
bef1 = '''#comments
config =
{
#comments
'name': 'hello',
'arctic':01011101,
'summu': 456,
'see?': 'world',
'armorique': 'bretagne'
}'''
bef2 = '''#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}'''
def one_extractor(data_containing_filename):
with open(data_containing_filename) as g:
contg = re.search('\[(\d+)\].+\[config\](.*?)\[(\\1)\]',g.read(),re.DOTALL)
if contg:
updtgen = ( re.match("([^=]+)=[ \f\t\v]*([^ \f\t\v].*|)",line.strip())
for line in contg.group(2).splitlines() )
updating_data = dict( mi.groups() for mi in updtgen if mi and mi.group(2))
else:
from sys import exit
exit(updating_filename + " isn't a valid file for updating")
pat = re.compile("(([ \t]*)([^:]+)):\s*(.+),?")
return (updating_data,pat)
for bef in (bef1,bef2):
# file to update: rudu.txt
with open('rudu.txt','w') as jecr:
jecr.write(bef)
# updating data: renew_rudu.txt
with open('renew_rudu.txt','w') as jecr:
jecr.write('''[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]''')
print 'BEFORE ---------------------------------'
with open('rudu.txt') as lir:
print lir.read()
print '\nUPDATING DATA --------------------------'
with open('renew_rudu.txt') as lir:
print lir.read()
file_updating('renew_rudu.txt',one_extractor,'rudu.txt')
print '\nAFTER ================================='
with open('rudu.txt','r') as f:
print f.read()
print '\n\nX#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#\n'
Result:
>>>
BEFORE ---------------------------------
#comments
config =
{
#comments
'name': 'hello',
'arctic':01011101,
'summu': 456,
'see?': 'world',
'armorique': 'bretagne'
}
UPDATING DATA --------------------------
[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]
AFTER =================================
#comments,
config =,
{,
#comments,
'name': 'abc',
'arctic':01011101,
'summu': 'tumulus',
'see?': 'world',
'armorique': 'BRETAGNE',
'boloni': 7600,
'nuclear': 'apocalypse'
}
X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#
BEFORE ---------------------------------
#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}
UPDATING DATA --------------------------
[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]
AFTER =================================
#comments,
config =,
{,
#comments,
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
'armorique': 'BRETAGNE',
'boloni': 7600,
'summu': 'tumulus',
'nuclear': 'apocalypse'
}
X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#
>>>
.
EDIT:
I have improved the code because I was still insatisfied. Now the "variable" front catches the blank characters ( ' ' or '\t' ) at the beginning of the data-containing lines in the file to be updated.
I had also forgot the instruction f.truncate() which is very important to not keep a tail of undesired characters.
I am satisfied to see that my code works well even with the following file in which a value is a dictionnary, as presented by Jagdev:
#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}
That confirms me in my choice to process line after line , and not trying to run through the entire file with a regex.
.
EDIT 2:
I again changed the code. The updating is performed by a function that takes as arguments :
the name of the updating file (the file containing the data used to udpdate another file)
and the function that is suited to extract the data from this particular updating file
Hence, it is possible to update a given file with data from various updating files. That makes the code more generic.

Very roughly (i.e. this hasn't been tested at all, and there are numerous imprvements that could be made such as the use of regex and/or pretty-printing):
dicts = []
with open('file1') as file1:
try:
file1content = file1.read()
eval(file1content )
file1content.strip(' ')
file1content.strip('\t')
for line in file1content.splitlines():
if '={' in line:
dicts.append(line.split('={').strip())
except:
print 'file1 not valid'
with open('file2') as file2:
filelines = file2.readlines()
while filelines:
while filelines and '[23]' not in filelines[0]:
filelines.pop(0)
if filelines:
filelines.pop(0)
dictname = filelines.pop(0).split('[')[1].split(']')[0]
if dictname not in dicts:
dicts.append(dictname)
exec(dictname + ' = {}')
while filelines and '[23]' not in filelines[0]:
line = filelines.pop(0)
[k,v] = line.split('=')
k.strip()
v.strip()
if v:
exec(dictname + '[k] = v')
with open('file3', 'w') as file3:
file3content = '\n'.join([`eval(dictname)` for dictname in dicts])
file3.write(file3content)

python parse configuration file, containing list of filenames

I would like to parse a config file, containing list of filenames, divided in sections:
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
..
I tried ConfigParser, but it requires pairs of name-value. How can I parse such a file?

Likely you have to implement the parser on your own.
Blueprint:
key = None
current = list()
for line in file(...):
if line.startswith('['):
if key:
print key, current
key = line[1:-1]
current = list()
else:
current.append(line)

Here is an iterator/generator solution:
data = """\
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
...""".splitlines()
def sections(it):
nextkey = next(it)
fin = False
while not fin:
key = nextkey
body = ['']
try:
while not body[-1].startswith('['):
body.append(next(it))
except StopIteration:
fin = True
else:
nextkey = body.pop(-1)
yield key, body[1:]
print dict(sections(iter(data)))
# if reading from a file, do: dict(sections(file('filename.dat')))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parse the file section wise in python - python

Related

Add values to unexisting keys in dictionary

internal error occured after json decoding 1100 elements using python

trying to create a dictionary from a text file but

file reading in python

python parse configuration file, containing list of filenames

Categories

Resources