file reading in python - python

So my whole problem is that I have two files one with following format(for Python 2.6):
#comments
config = {
#comments
'name': 'hello',
'see?': 'world':'ABC',CLASS=3
}
This file has number of sections like this. Second file has format:
[23]
[config]
'name'='abc'
'see?'=
[23]
Now the requirement is that I need to compare both files and generate file as:
#comments
config = {
#comments
'name': 'abc',
'see?': 'world':'ABC',CLASS=3
}
So the result file will contain the values from the first file, unless the value for same attribute is there in second file, which will overwrite the value. Now my problem is how to manipulate these files using Python.
Thanks in advance and for your previous answers in short time ,I need to use python 2.6

Was unable to find a beautiful solution due to the comments. This is tested and works for me, but requires Python 3.1 or higher:
from collections import OrderedDict
indenting = '\t'
def almost_py_read(f):
sections = OrderedDict()
contents = None
active = sections
for line in f:
line = line.strip()
if line.startswith('#'):
active[line] = None
elif line.endswith('{'):
k = line.split('=')[0].strip()
contents = OrderedDict()
active = contents
sections[k] = contents
elif line.endswith('}'):
active = sections
else:
try:
k, v = line.split(':')
k = k.strip()
v = v.strip()
active[k] = v
except:
pass
return sections
def almost_ini_read(f):
sections = OrderedDict()
contents = None
for line in f:
line = line.strip()
try:
k, v = line.split('=')
k = k.strip()
v = v.strip()
if v:
contents[k] = v
except:
if line.startswith('[') and line.endswith(']'):
contents = OrderedDict()
sections[line[1:-1]] = contents
print(sections)
return sections
def compilefiles(pyname, ininame):
sections = almost_py_read(open(pyname, 'rt'))
override_sections = almost_ini_read(open(ininame, "rt"))
for section_key, section_value in override_sections.items():
if not sections.get(section_key):
sections[section_key] = OrderedDict()
for k, v in section_value.items():
sections[section_key][k] = v
return sections
def output(d, indent=''):
for k, v in d.items():
if v == None:
print(indent+k)
elif v:
if type(v) == str:
print(indent+k+': '+v+',')
else:
print(indent+k+' = {')
output(v, indent+indenting)
print(indent+'}')
d = compilefiles('a.txt', 'b.ini')
output(d)
Output:
#comments
config = {
#comments
'name': 'abc',
'see?': 'world',
}

I had a really long and hard time to manage to write the following code.
I had difficulties to manage with commas. I wanted the updated file to have after the updating the same format as the file to update before the updating : lines end with a comma, except for the last one.
This code is crafted for the particular problem as exposed by the questioner and can't be used as-is for another type of problem. I know. It's the problem of using a code based on regex and not on a parser, I'm fully aware of that. But I think that it is a canvas that can be relatively easily adapted to other cases, by changing the regexes, which is a relatively readily process thanks to the malleability of regexes.
def file_updating(updating_filename,updating_data_extractor,filename_to_update):
# function whose name is hold by updating_data_extractor parameter
# is a function that
# extracts data from the file whose name is hold by updating_filename parameter
# and must return a tuple:
# ( updating dictionary , compiled regex )
updating_dico,pat = updating_data_extractor( updating_filename )
with open(filename_to_update,'r+') as f:
lines = f.readlines()
def jiji(line,dico = updating_dico ):
mat = pat.search(line.rstrip())
if mat and mat.group(3) in dico:
return '%s: %s,' % (mat.group(1),dico.pop(mat.group(3)))
else:
return line.rstrip(',') + ','
li = [jiji(line) for line in lines[0:-1] ] # [0:-1] because last line is '}'
front = (mit.group(2) for mit in ( pat.search(line) for line in lines ) if mit).next()
li.extend(front + '%s: %s,' % item for item in updating_dico.iteritems() )
li[-1] = li[-1].rstrip(',')
li.append('}')
f.seek(0,0)
f.writelines( '\n'.join(li) )
f.truncate()
Exemplifying code:
import re
bef1 = '''#comments
config =
{
#comments
'name': 'hello',
'arctic':01011101,
'summu': 456,
'see?': 'world',
'armorique': 'bretagne'
}'''
bef2 = '''#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}'''
def one_extractor(data_containing_filename):
with open(data_containing_filename) as g:
contg = re.search('\[(\d+)\].+\[config\](.*?)\[(\\1)\]',g.read(),re.DOTALL)
if contg:
updtgen = ( re.match("([^=]+)=[ \f\t\v]*([^ \f\t\v].*|)",line.strip())
for line in contg.group(2).splitlines() )
updating_data = dict( mi.groups() for mi in updtgen if mi and mi.group(2))
else:
from sys import exit
exit(updating_filename + " isn't a valid file for updating")
pat = re.compile("(([ \t]*)([^:]+)):\s*(.+),?")
return (updating_data,pat)
for bef in (bef1,bef2):
# file to update: rudu.txt
with open('rudu.txt','w') as jecr:
jecr.write(bef)
# updating data: renew_rudu.txt
with open('renew_rudu.txt','w') as jecr:
jecr.write('''[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]''')
print 'BEFORE ---------------------------------'
with open('rudu.txt') as lir:
print lir.read()
print '\nUPDATING DATA --------------------------'
with open('renew_rudu.txt') as lir:
print lir.read()
file_updating('renew_rudu.txt',one_extractor,'rudu.txt')
print '\nAFTER ================================='
with open('rudu.txt','r') as f:
print f.read()
print '\n\nX#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#\n'
Result:
>>>
BEFORE ---------------------------------
#comments
config =
{
#comments
'name': 'hello',
'arctic':01011101,
'summu': 456,
'see?': 'world',
'armorique': 'bretagne'
}
UPDATING DATA --------------------------
[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]
AFTER =================================
#comments,
config =,
{,
#comments,
'name': 'abc',
'arctic':01011101,
'summu': 'tumulus',
'see?': 'world',
'armorique': 'BRETAGNE',
'boloni': 7600,
'nuclear': 'apocalypse'
}
X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#
BEFORE ---------------------------------
#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}
UPDATING DATA --------------------------
[23]
[config]
'nuclear'= 'apocalypse'
'name'='abc'
'armorique'= 'BRETAGNE'
'arctic'=
'boloni'=7600
'see?'=
'summu'='tumulus'
[23]
AFTER =================================
#comments,
config =,
{,
#comments,
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
'armorique': 'BRETAGNE',
'boloni': 7600,
'summu': 'tumulus',
'nuclear': 'apocalypse'
}
X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#X#
>>>
.
EDIT:
I have improved the code because I was still insatisfied. Now the "variable" front catches the blank characters ( ' ' or '\t' ) at the beginning of the data-containing lines in the file to be updated.
I had also forgot the instruction f.truncate() which is very important to not keep a tail of undesired characters.
I am satisfied to see that my code works well even with the following file in which a value is a dictionnary, as presented by Jagdev:
#comments
config =
{
#comments
'name': 'abc',
'see?': { 'world':'india':'jagdev'},
}
That confirms me in my choice to process line after line , and not trying to run through the entire file with a regex.
.
EDIT 2:
I again changed the code. The updating is performed by a function that takes as arguments :
the name of the updating file (the file containing the data used to udpdate another file)
and the function that is suited to extract the data from this particular updating file
Hence, it is possible to update a given file with data from various updating files. That makes the code more generic.

Very roughly (i.e. this hasn't been tested at all, and there are numerous imprvements that could be made such as the use of regex and/or pretty-printing):
dicts = []
with open('file1') as file1:
try:
file1content = file1.read()
eval(file1content )
file1content.strip(' ')
file1content.strip('\t')
for line in file1content.splitlines():
if '={' in line:
dicts.append(line.split('={').strip())
except:
print 'file1 not valid'
with open('file2') as file2:
filelines = file2.readlines()
while filelines:
while filelines and '[23]' not in filelines[0]:
filelines.pop(0)
if filelines:
filelines.pop(0)
dictname = filelines.pop(0).split('[')[1].split(']')[0]
if dictname not in dicts:
dicts.append(dictname)
exec(dictname + ' = {}')
while filelines and '[23]' not in filelines[0]:
line = filelines.pop(0)
[k,v] = line.split('=')
k.strip()
v.strip()
if v:
exec(dictname + '[k] = v')
with open('file3', 'w') as file3:
file3content = '\n'.join([`eval(dictname)` for dictname in dicts])
file3.write(file3content)

Related

How do I alter an existing text file to add new data after a specific line using Python

So I am looking to create a script to make a mod for a game using Python. The script will need to copy all files from a directory to another directory, then alter those files to add a new attribute after a specific line. The issue I am having is that this game uses custom coding based on json formatting in a txt file. I know how to do most of this, however, adding the new data is not something I can get to work.
My end goal will be to be able to do this to any file, so other mod authors can use it to add the data to their mods without needing to do it manually. I also want to try to make this script do more advanced things, but that is another goal that can wait till I get this bit working.
Sample data:
The line I need to add is position_priority = ###. The ### will be different based on what the building does (building categories).
Sample code I need to alter:
building_name_number = {
base_build_time = 60
base_cap_amount = 1
category = pop_assembly
<more code>
}
I need to put the new data just after building_name_number, however this exact name will be unique, the only thing that will always be the same is that it will start with building. So regex is what I have been trying to use, but I have never dealt with regex so I cant get it to work.
My Current code:
if testingenabled:
workingdir = R"E:/Illusives-Mods/Stellaris/Building Sorting"
pattern = "^building_"
Usortingindex = sortingindex["sorting_pop_assembly"]
print(f"Testing Perameters: Index: {Usortingindex}, Version: {__VERSION__}, Working DIR: {workingdir}")
# os.chdir(stellaris_buildings_path)
os.chdir(workingdir)
for file in os.listdir(workingdir):
if fnmatch.fnmatch(file, "*.txt"):
print("File found")
with open(file, "r+", encoding="utf-8") as openfiledata:
alllines = openfiledata.read()
for line in alllines:
if line == re.match(r'(^building_)', line, re.M):
print("found match")
# print(f"{sorting_attrib}{Usortingindex}")
# print("position_priority = 200")
openfiledata.write("\n" + sorting_attrib + Usortingindex + "\n")
break
I am not getting any errors with this code. But it doesnt work
I am using Python 3.9.6.
EDIT:
This code is before the script
allow = {
hidden_trigger = {
OR = {
owner = { is_ai = no }
NAND = {
free_district_slots = 0
free_building_slots <= 1
free_housing <= 0
free_jobs <= 0
}
}
}
}
This is after
allow = {
hidden_trigger = {
OR = {
owner = {
is_ai = false
}
NAND = {
free_district_slots = 0
free_building_slots = {
value = 1
operand = <=
}
free_housing = {
value = 0
operand = <=
}
free_jobs = {
value = 0
operand = <=
}
}
}
}
}
The output must be the same as the input, at least in terms of the operators
If you would keep it as JSON then you could read all to Python (to get ti as dictionary), search and add items in dictionary, and write back to JSON new dictionary.
text = '''{
"building_name_number": {
"base_build_time": 60,
"base_cap_amount": 1,
"category": "pop_assembly"
},
"building_other": {}
}'''
import json
data = json.loads(text)
for key in data.keys():
if key.startswith('building_'):
data[key]["position_priority"] = 'some_value'
print(json.dumps(data, indent=4))
Result:
{
"building_name_number": {
"base_build_time": 60,
"base_cap_amount": 1,
"category": "pop_assembly",
"position_priority": "some_value"
},
"building_other": {
"position_priority": "some_value"
}
}
I found module paradox-reader which can convert this file format to JSON file.
Using code from file paradoxReader.py I created example which can convert string to Python dictionary, add some value and convert to something similar to original file. But this may need to add more code in encode()
import json
import re
def decode(data):#, no_json):
data = re.sub(r'#.*', '', data) # Remove comments
data = re.sub(r'(?<=^[^\"\n])*(?<=[0-9\.\-a-zA-Z])+(\s)(?=[0-9\.\-a-zA-Z])+(?=[^\"\n]*$)', '\n', data, flags=re.MULTILINE) # Seperate one line lists
data = re.sub(r'[\t ]', '', data) # Remove tabs and spaces
definitions = re.findall(r'(#\w+)=(.+)', data) # replace #variables with value
if definitions:
for definition in definitions:
data = re.sub(r'^#.+', '', data, flags=re.MULTILINE)
data = re.sub(definition[0], definition[1], data)
data = re.sub(r'\n{2,}', '\n', data) # Remove excessive new lines
data = re.sub(r'\n', '', data, count=1) # Remove the first new line
data = re.sub(r'{(?=\w)', '{\n', data) # reformat one-liners
data = re.sub(r'(?<=\w)}', '\n}', data) # reformat one-liners
data = re.sub(r'^[\w-]+(?=[\=\n><])', r'"\g<0>"', data, flags=re.MULTILINE) # Add quotes around keys
data = re.sub(r'([^><])=', r'\1:', data) # Replace = with : but not >= or <=
data = re.sub(r'(?<=:)(?!-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?)(?!\".*\")[^{\n]+', r'"\g<0>"', data) # Add quotes around string values
data = re.sub(r':"yes"', ':true', data) # Replace yes with true
data = re.sub(r':"no"', ':false', data) # Replace no with false
data = re.sub(r'([<>]=?)(.+)', r':{"value":\g<2>,"operand":"\g<1>"}', data) # Handle < > >= <=
data = re.sub(r'(?<![:{])\n(?!}|$)', ',', data) # Add commas
data = re.sub(r'\s', '', data) # remove all white space
data = re.sub(r'{(("[a-zA-Z_]+")+)}', r'[\g<1>]', data) # make lists
data = re.sub(r'""', r'","', data) # Add commas to lists
data = re.sub(r'{("\w+"(,"\w+")*)}', r'[\g<1>]', data)
data = re.sub(r'((\"hsv\")({\d\.\d{1,3}(,\d\.\d{1,3}){2}})),', r'{\g<2>:\g<3>},', data) # fix hsv objects
data = re.sub(r':{([^}{:]*)}', r':[\1]', data) # if there's no : between list elements need to replace {} with []
data = re.sub(r'\[(\w+)\]', r'"\g<1>"', data)
data = re.sub(r'\",:{', '":{', data) # Fix user_empire_designs
data = '{' + data + '}'
return json.loads(data)
def encode(data):
text = json.dumps(data, indent=4)
text = text[2:-2]
text = text.replace('"', '').replace(':', ' =').replace(',', '')
return text
# ----------
text = '''building_name_number = {
base_build_time = 60
base_cap_amount = 1
category = pop_assembly
}'''
data = decode(text)
data['building_name_number']['new_item'] = 123
text = encode(data)
print(text)
Result:
building_name_number = {
base_build_time = 60
base_cap_amount = 1
category = pop_assembly
new_item = 123
}

How to Change dictionary values in python file from another file

I would like to change values in a Dict in another file. File1.py contains the code to edit the Dict, File2.py contains the Dict itself.
File1.py is generating a code to replace BTOK values only.
File1.py:
with open('file2.py', 'r') as file :
filedata = file.read()
print (filedata.str(BTK['btk1']))
for line in filedata:
line['btk1'] = BTok
with open('file2.py', 'w') as file:
file.write(line)
File2.py:
c = {
'id' : 'C80e3ce43c3ea3e8d1511ec',
'secret' : 'c10c371b4641010a750073925b0857'
}
rk = {
't1' : 'ZTkwMGE1MGEt',
}
BTK = {
'BTok' : '11eyJhbGc'
}
If you want to do this reliably, that is, so it works whether your strings are quoted with ', " or """, for whatever values they have and whatever newlines you want to put around values, then you may want to use ast to parse the source code and modify it. The only inconvenient with this is that module cannot, by itself, generate code, so you would need to install some additional dependency such as astor, for what is essentially a rather menial task. In any case, here is how you could do it that way:
import ast
import astor
# To read from file:
# with open('file2.py', 'r') as f: code = f.read()
code = """
c = {
'id' : 'C80e3ce43c3ea3e8d1511ec',
'secret' : 'c10c371b4641010a750073925b0857'
}
rk = {
't1' : 'ZTkwMGE1MGEt',
}
BTK = {
'BTok' : '11eyJhbGc'
}
"""
# Value to replace
KEY = 'BTok'
NEW_VALUE = 'new_btok'
# Parse code
m = ast.parse(code)
# Go through module statements
for stmt in m.body:
# Only look at assignments
if not isinstance(stmt, ast.Assign): continue
# Take right-hand side of the assignment
value = stmt.value
# Only look at dict values
if not isinstance(value, ast.Dict): continue
# Look for keys that match what we are looking for
replace_idx = [i for i, k in enumerate(value.keys)
if isinstance(k, ast.Str) and k.s == KEY]
# Replace corresponding values
for i in replace_idx:
value.values[i] = ast.Str(NEW_VALUE)
new_code = astor.to_source(m)
# To write to file:
# with open(`file2.py', 'w') as f: f.write(new_code)
print(new_code)
# c = {'id': 'C80e3ce43c3ea3e8d1511ec', 'secret':
# 'c10c371b4641010a750073925b0857'}
# rk = {'t1': 'ZTkwMGE1MGEt'}
# BTK = {'BTok': 'new_btok'}

Need help to write right parsing option

I need to parse text file which looks like this:
key : 123
anotherKey : qwer
oneMoreKey :
somestring,
somestring
There are a lot of this type of strings, they are generated automatically by server, so I don't know how much of them will receive parser
I have solved parsing of foo : bar like this:
def main():
data = {}
file = open('file.txt') # opening log file
for line in file:
if re.match(r'^\s*$', line):
pass
else:
line = line.split(':')
key = line[0].strip()
if len(line) == 2: # this is a workaround for lines like "foo :\n
value = line[1].strip()
else:
value = 'none'
if key in data:
pass
else:
data[key] = value
I need to get all data in json like
{
key : 123,
anotherKey : qwer,
oneMoreKey : [somestring, somestring]
}
Sth. like this?
import re
rx = re.compile(r'^(?P<key>\w+)\s:(?P<value>.+?)(?=^\w+\s*:|\Z)', re.S | re.M)
junk = """key : 123
anotherKey : qwer
foo : bar, zxc
oneMoreKey :
somestring,
somestring
"""
def match(m):
values = [val for value in re.split(r', *[\n\r]+', m) for val in [value.strip()] if val]
return values if len(values) > 1 else m.strip()
d = {m.group('key'): match(m.group('value')) for m in rx.finditer(junk)}
print(d)
This yields
{'key': '123', 'anotherKey': 'qwer', 'foo': 'bar, zxc', 'oneMoreKey': ['somestring', 'somestring']}
See a demo on regex101.com.

Python File String Replace Dict and Tuple

have a Dict with multiple values in a tuple.
newhost = {'newhost.com': ('1.oldhost.com',
'2.oldhost.com',
'3.oldhost.com',
'4.oldhost.com')
}
I wanna open a existing file and search for lines in this file that contains a value of the oldhosts. A file can have multiple Account Lines. In example
Account: 1.oldhost.com username
Account: someotherhost username
When the line with 1.oldhost.com or 2.oldhost.com or 3.oldhost.com and so on is found i wanna replace it with the key form the dict newhost.com.
Can anyone help me? Searched alot, but didnt find the right thing.
Regards
Maybe something like this could get you started
infile_name = 'some_file.txt'
# Open and read the incoming file
with open(infile_name, 'r') as infile:
text = infile.read()
# Cycle through the dictionary
for newhost, oldhost_list in host_dict.items():
# Cycle through each possible old host
for oldhost in oldhost_list:
text.replace(oldhost, newhost)
outfile_name = 'some_other_file.txt'
# Write to file
with open(outfile_name, 'w') as outfile:
outfile.write(text)
Not claiming this to be the best solution, but it should be a good start for you.
To easily find the new host for a given old host, you should convert your data structure:
# your current structure
new_hosts = {
'newhost-E.com': (
'1.oldhost-E.com',
'2.oldhost-E.com',
),
'newhost-A.com': (
'1.oldhost-A.com',
'2.oldhost-A.com',
'3.oldhost-A.com',
),
}
# my proposal
new_hosts_2 = {
v: k
for k, v_list in new_hosts.items()
for v in v_list}
print(new_hosts_2)
# {
# '1.oldhost-E.com': 'newhost-E.com',
# '2.oldhost-E.com': 'newhost-E.com',
# '1.oldhost-A.com': 'newhost-A.com',
# '2.oldhost-A.com': 'newhost-A.com',
# '3.oldhost-A.com': 'newhost-A.com',
# }
This does repeat the new host names (the values in new_hosts_2), but it will allow you to quickly look up given an old host name:
some_old_host = 'x.oldhost.com'
the corresponding_new_host = new_hosts_2[some_old_host]
Now you just need to:
read the lines of the file
find the old hostname in that line
lookup the corresponding new host in new_hosts_2
replace that value in the line
write the line to a new file
Maybe like this:
with open(file_name_1, 'r') as fr:
with open(file_name_2, 'w') as fw:
for line in fr:
line = line.strip()
if len(line) > 0:
# logic to find the start and end position of the old host
start_i = ?
end_i = ?
# get and replace, but only if its found in 'new_hosts_2'
old_host = line[start_i:end_i]
if old_host in new_hosts_2:
line = line[:start_i] + new_hosts_2[old_host] + line[end_i:]
fw.write(line + '\n')
Thank you for your tips. I came up with this now and it is working fine.
import fileinput
textfile = 'somefile.txt'
curhost = 'newhost.com'
hostlist = {curhost: ('1.oldhost.com',
'2.oldhost.com',
'3.oldhost.com')
}
new_hosts_2 = {
v: k
for k, v_list in hostlist.items()
for v in v_list}
for line in fileinput.input(textfile, inplace=True):
line = line.rstrip()
if not line:
continue
for f_key, f_value in new_hosts_2.items():
if f_key in line:
line = line.replace(f_key, f_value)
print line

Parse the file section wise in python

I have a text (abc.txt) file having the following entry in text file:
[General]
Local=C:\Work\July\
path=C:\Work\July\abc
[Field1]
BB0B2BA8--EFE4-4567-B8AE-0204D4BF9F60=
[CustDetails]
BB0B2BA8-EFE4-4567-B8AE-0204D4BF9F60=NOthing
[DirName]
8e27822e-5f46-4f41=TEST
[URLNAME]
8e27822e-5f46=https://
[DestURL]
8e27822e-5f46=some_URL
I want to parse the abc.txt file and take into variable. like in variable
MYpath = C:\Work\July\abc
custdetails= Nothing
dir_name = TEST
URL_Name = https://
DestURL = some_URL
Thanks,
Using ConfigParser:
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('abc.txt')
dic = {}
for section in config.sections():
for option in config.options(section):
res = config.get(section, option)
if res == '':
continue
dic.update({section: res})
print dic
Output:
{'DestURL': 'some_URL', 'URLNAME': 'https://', 'CustDetails': 'NOthing', 'DirName': 'TEST', 'General': 'C:\\Work\\July\\abc'}
You can use a dict here:
>>> dic = {}
with open('abc.txt') as f:
data = f.read()
lines = data.split('\n\n')
for line in lines:
line = line.split('\n')
field = line[0].strip('[]')
val = line[-1].split('=')[1]
if val:
dic[field] = val
...
>>> dic
{'DestURL': 'some_URL',
'URLNAME': 'https://',
'CustDetails': 'NOthing',
'DirName': 'TEST',
'General': 'C:\\Work\\July\\abc'}

Categories

Resources