Index breaks in a set - python

I'm writing a python script that takes a text file like the following as input:
0 A (*) Begin
1 A
2 A
3 A
4 A
5 A (*) End
0 B (*) Begin
1 B
2 B
3 B
4 B
5 B
6 B (*) End
0 B (*) Begin
1 B
2 B (*) End
0 B (*) Begin
1 B
2 B
3 B
4 B (*) End
Now I'd like to receive the output like this one:
0 A (*) Begin [0]
1 A
2 A
3 A
4 A
5 A (*) End [0]
0 B (*) Begin [0]
1 B
2 B
3 B
4 B
5 B
6 B (*) End [0]
0 B (*) Begin [1]
1 B
2 B (*) End [1]
0 B (*) Begin [2]
1 B
2 B
3 B
4 B (*) End [2]
that would be the translation of:
>>>A = [0,1,2,3,4,5]
>>>A
[0, 1, 2, 3, 4, 5]
>>>len(A)
6
>>>B = [(0,1,2,3,4,5,6), (0,1,2), (0,1,2,3,4)]
>>>B
[(0,1,2,3,4,5,6), (0,1,2), (0,1,2,3,4)]
>>>len(B[0])
7
>>>len(B[1])
3
>>>len(B[2])
5
I tried for now in this way:
import sys, os
import os.path
txtdir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(txtdir, 'txt/')
textfile = "%sfile.txt" % (path)
with open(textfile,"r") as f:
# read a LIST of rows into "data"
data = f.readlines()
previous_col = ''
current_col = ''
for row in data:
match_1 = "(*) Begin"
match_2 = "(*) End"
if (match_1 in row or match_2 in row):
col = row.split()
print col
print len(col)
if (col[3] == "End"):
previous_col = col[1]
elif (col[3] == "Begin"):
current_col = col[1]
i = 0
if current_col:
if (col[3] == "Begin" and current_col == previous_col):
i += 1 # increment
col[3] = "Begin [%s]" % (i)
else:
col[3] = "Begin [0]"
# and write everything back
with open(textfile, 'w') as f:
f.writelines( data )
# close f file
f.close()
But there is no change into "file.txt"
May you suggest me something?
Thanks a lot,
Riccardo

Apart from the obvious issue pointed out by Paul Lo that you're not updating data list at all, you're in my opinion over-complicating this stuff, you can do this simply using collections.Counter:
from collections import Counter
c = Counter()
with open('file.txt') as f, open('outfile.txt', 'w') as out:
for line in f:
# In `if` and `elif`, use the current value of the counter
# for the second column, default is 0.
if '(*) Begin' in line:
text = line.split(None, 2)[1]
out.write('{}[{}]\n'.format(line.strip(), c[text]))
elif '(*) End' in line:
text = line.split(None, 2)[1]
out.write('{}[{}]\n'.format(line.strip(), c[text]))
# End found, so now increment the counter
c[text] += 1
else:
out.write(line)
Also to modify the same file in-place I would recommend using the fileinput module:
from collections import Counter
import fileinput
c = Counter()
for line in fileinput.input('file1.txt', inplace=True):
if '(*) Begin' in line:
text = line.split(None, 2)[1]
print '{}[{}]'.format(line.strip(), c[text])
elif '(*) End' in line:
text = line.split(None, 2)[1]
print '{}[{}]'.format(line.strip(), c[text])
# End found, so now increment the counter
c[text] += 1
else:
print line,
Output:
0 A (*) Begin[0]
1 A
2 A
3 A
4 A
5 A (*) End[0]
0 B (*) Begin[0]
1 B
2 B
3 B
4 B
5 B
6 B (*) End[0]
0 B (*) Begin[1]
1 B
2 B (*) End[1]
0 B (*) Begin[2]
1 B
2 B
3 B
4 B (*) End[2]

You made lots of work in for row in data, but you didn't really make any changes on data (or row), you just write the origin stuff back to the file again, that's why it looks like nothing has changed. And your modification on col which comes from col = row.split(), that would not change row and data as well.
Without changing too much on your code, you probably want to fix it by this way which use another list for saving the updated lines:
textfile = "%sfile.txt" % (path)
with open(textfile,"r") as f:
# read a LIST of rows into "data"
data = f.readlines()
previous_col = ''
current_col = ''
lst = list() # for the new content you are going to override
for row in data:
match_1 = "(*) Begin"
match_2 = "(*) End"
if (match_1 in row or match_2 in row):
col = row.split()
if (col[3] == "End"):
previous_col = col[1]
elif (col[3] == "Begin"):
current_col = col[1]
i = 0
if current_col:
if (col[3] == "Begin" and current_col == previous_col):
i += 1 # increment
col[3] = "Begin [%s]" % (i)
else:
col[3] = "Begin [0]"
lst.append(' '.join(col)) # you can adjust it to the behavior you need
else: # append original line
lst.append(row)
# and write everything back
with open(textfile, 'w') as f:
for line in lst: # write the element in lst to the file
f.writelines(line)
f.writelines('\n')

Related

Python inner loop and outer loop with counter to iterate over list

I have specific issue where, im trying to find solution on inner loop(execute 3 time) and continue outer to process rest of the list in for loop:
strings = ['A','A','A','A','A','A','A','A','A','A','B','B','B','B','B','B','B','B','B','B',\
'A','A','A','A','A','A','A','A','A','A','B','B','B','B','B','B','B','B','B','B']
i=0
for string in strings:
global i
if string == 'A':
while i < 3:
print(string, i)
i+=1
if i==3: continue
elif string== 'B':
while i < 3:
print(string,i)
i+=1
if i==3: continue
# print(string)
Current result:
A 0
A 1
A 2
Expected to have continued over list once the inner loop complete and process from next:
A 0
A 1
A 2
B 0
B 1
B 2
A 0
A 1
A 2
B 0
B 1
B 2
If I understand correctly the logic, you could use itertools.groupby to help you form the groups:
variant #1
from itertools import groupby
MAX = 3
for k,g in groupby(strings):
for i in range(min(len(list(g)), MAX)):
print(f'{k} {i}')
print()
variant #2
from itertools import groupby
MAX = 3
for k,g in groupby(strings):
for i,_ in enumerate(g):
if i >= MAX:
break
print(f'{k} {i}')
print()
output:
A 0
A 1
A 2
B 0
B 1
B 2
A 0
A 1
A 2
B 0
B 1
B 2
variant #3: without import
prev = None
count = 0
MAX = 3
for s in strings:
if s == prev:
if count < MAX:
print(f'{s} {count}')
count += 1
elif prev:
count = 0
print()
prev = s

Replace one column in a line in a text file using Python

I have a text file and it has the following contents
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 0 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0
This is the input that I need to supply to a tool which used in some research application. Now I need to replace the 0 on the first line which starts with Li on the third column from the last. That is, there are four zeros towards the end in each of the lines starting with Li. I need to replace the second zero and so the file will have the the contents as follows:
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 1 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0
This has to be done a number of times for the zeros in various positions and I have the following code. There are some more operations that I am doing in the same code.
import os
import shutil
import time
def edit_file(column, next_column):
# Processing x.gin file
file_name = './' + column + '.gin'
file_handler = open(file_name, 'r')
print("Processing " + file_name + " file")
contents = file_handler.readlines()
find_line = contents[14]
find_line_array = find_line.split('\t')
print(find_line_array)
# change values according to the file name
if column == 'x':
find_line_array[8] = 1
elif column == 'y':
print(contents)
print(find_line_array)
find_line_array[9] = 1
elif column == 'z':
find_line_array[10] = 1
elif column == 'xy':
find_line_array[8] = 1
find_line_array[9] = 1
elif column == 'yz':
find_line_array[9] = 1
find_line_array[10] = 1
elif column == 'xz':
find_line_array[8] = 1
find_line_array[10] = 1
formatted = '\t'.join(map(str, find_line_array))
contents[14] = formatted
with open(file_name, 'w') as f:
for item in contents:
f.write("%s\n" % item)
print("Formatting completed for " + file_name)
print('Executing GULP command ----')
gulp_command = 'gulp ' + column + '.gin > ' + column + '.gout'
print(gulp_command)
shutil.copy(file_name, next_column+'.gin')
file_handler.close()
os.system(gulp_command)
while not os.path.exists('./Li.grs'):
print('Waiting for output file')
time.sleep(1)
if os.path.isfile('./Li.grs'):
print('renaming file')
os.rename('./Li.grs', next_column+'.gin')
os.rename('./Li.grs', column+'.grs')
return True
if __name__ == '__main__':
print('Starting Execution')
column_list = ['x', 'y', 'xy', 'yz', 'xz']
print(column_list)
for index, column in enumerate(column_list):
if column != 'xz':
edit_file(column, column_list[index + 1])
else:
edit_file(column, 'xz')
print('Execution completed')
I am replacing it correctly and rewriting the file. But this file doesn't appears to be in correct format as it has additional new lines. Is it possible that I can rewrite the single line only and so that I can keep the file in exact same format.
i created a function for that. try this
def replace(filename,row,column,value):
columnspan = " "
data = open(filename).read().split("\n")
for i in range(len(data)):
data[i] = data[i].split(columnspan)
data[row][column] = value
write=""
for i in range(len(data)):
for x in range(len(data[i])):
write+=(str(data[i][x])+columnspan)
write += "\n"
write.strip()
file = open(filename,"w")
file.write(write)
file.close()
You can use regex to find and update text:
import re
with open('input.txt', 'r') as f1, open('output.txt', 'w') as f2:
data = f1.read()
match = re.findall('Li\s+\w+\s+\d+\.\d+\s+\d\.\d+\s+\d\.\d+\s+\d\s+\d\s+\d\s+\d', data)
for m in match:
data = data.replace(m, f'{m[:-1]}1')
f2.write(data)
Output:
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 1 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0

Python to evaluate duplicate elements in csv files

I have 2 csv files:
csv 1:
CHANNEL
3
3
4
1
2
1
4
5
csv 2:
CHANNEL
1
2
2
3
4
4
4
5
I want to evaluate the state of the channel by finding the duplicate channels. If number of channel > 1, the state is 0, else the state is 1.
output csv:
index channel 1 channel 2 channel 3 channel 4 channel 5
1 0 1 0 0 0
2 1 0 1 0 1
So far I have counted the duplicate channels but for 1 file only. Now I don't know how to read 2 csv files and create the output file.
import csv
import collections
with open("csvfile.csv") as f:
csv_data = csv.reader(f,delimiter=",")
next(csv_data)
count = collections.Counter()
for row in csv_data:
channel = row[0]
count[channel] += 1
for channel, nb in count.items():
if nb>1:
You can read each file into a list then check the channel counts of each list.
Try this code:
ss1 = '''
CHANNEL
3
3
4
1
2
1
4
5
'''.strip()
ss2 = '''
CHANNEL
1
2
2
3
4
4
4
5
'''.strip()
with open("csvfile1.csv",'w') as f: f.write(ss1) # write test file 1
with open("csvfile2.csv",'w') as f: f.write(ss2) # write test file 2
#############################
with open("csvfile1.csv") as f:
lines1 = f.readlines()[1:] # skip header
lines1 = [int(x) for x in lines1] # convert to ints
with open("csvfile2.csv") as f:
lines2 = f.readlines()[1:] # skip header
lines2 = [int(x) for x in lines2] # convert to ints
lines = [lines1,lines2] # make list for iteration
state = [[0]*5,[0]*5] # default zero for each state
for ci in [0,1]: # each file
for ch in range(5): # each channel
state[ci][ch] = 0 if lines[ci].count(ch+1) > 1 else 1 # check channel count, set state
# write to terminal
print('Index','Channel 1','Channel 2','Channel 3','Channel 4','Channel 5', sep = ' ')
print(' ',1,' ',' '.join(str(c) for c in state[0]))
print(' ',2,' ',' '.join(str(c) for c in state[1]))
# write to csv
with open('state.csv','w') as f:
f.write('Index,Channel 1,Channel 2,Channel 3,Channel 4,Channel 5\n')
f.write('1,' + ','.join(str(c) for c in state[0]) + '\n')
f.write('2,' + ','.join(str(c) for c in state[1]) + '\n')
Output (terminal)
Index Channel 1 Channel 2 Channel 3 Channel 4 Channel 5
1 0 1 0 0 1
2 1 0 1 0 1
Output (state.csv)
Index,Channel 1,Channel 2,Channel 3,Channel 4,Channel 5
1,0,1,0,0,1
2,1,0,1,0,1
You can use collections.Counter
from collections import Counter
# read the two files
with open('file_0.csv','r') as source:
zero = source.readlines()
with open('file_1.csv','r') as source:
one = source.readlines()
# convert to integers
# if the last item is not '\n', you only need [1:]
zero = [int(item) for item in zero[1:-1]]
one = [int(item) for item in one[1:-1]]
# combine two lists
zero += one
# count the values with counter
channels_counts = Counter(zero)
unique_channels = sorted(set(channels_counts.keys()))
res = [0 if channels_counts[item] > 1 else 1 for item in unique_channels]
for ind,item in enumerate(res):
print('channel %i' % ind,item)

select some lines as a group then give each line same label in same group in txt file

I want to extract some lines of txt file between two patterns as different groups and give each line a same number as a label in each group.(last column input the group label ) for example, have data:
==g1
a 1 2 3 4
b 2 1 2 3
~~
==g2
c 2...
d 1...
...
I want to get output like
a 1 2 3 4 1
b 2 1 2 3 1
c 2 ... 2
d 1 ... 2
...
I use python 3.7
with open('input.txt') as infile, open('output.txt', 'w') as outfile:
copy = False
for line in infile:
#ng as the new label in each group
ng=0
if line.strip() == "start":
copy = True
continue
elif line.startswith ('with'):
copy = False
continue
elif copy:
ng=ng+1
line=line.rstrip('\n') +'\t'+ str(ng)+'\n'
outfile.write(line)

Count all +1's in the file python

I have the following data:
1 3 4 2 6 7 8 8 93 23 45 2 0 0 0 1
0 3 4 2 6 7 8 8 90 23 45 2 0 0 0 1
0 3 4 2 6 7 8 6 93 23 45 2 0 0 0 1
-1 3 4 2 6 7 8 8 21 23 45 2 0 0 0 1
-1 3 4 2 6 7 8 8 0 23 45 2 0 0 0 1
The above data is in a file. I want to count the number of 1's,0's,-1's but only in 1st column. I am taking the file in standard input but the only way I could think of is to do like this:
cnt = 0
cnt1 = 0
cnt2 = 0
for line in sys.stdin:
(t1, <having 15 different variables as that many columns are in files>) = re.split("\s+", line.strip())
if re.match("+1", t1):
cnt = cnt + 1
if re.match("-1", t1):
cnt1 = cnt1 + 1
if re.match("0", t1):
cnt2 = cnt2 + 1
How can I make it better especially the 15 different variables part as thats the only place where I will be using those variables.
Use collections.Counter:
from collections import Counter
with open('abc.txt') as f:
c = Counter(int(line.split(None, 1)[0]) for line in f)
print c
Output:
Counter({0: 2, -1: 2, 1: 1})
Here str.split(None, 1) splits the line just once:
>>> s = "1 3 4 2 6 7 8 8 93 23 45 2 0 0 0 1"
>>> s.split(None, 1)
['1', '3 4 2 6 7 8 8 93 23 45 2 0 0 0 1']
Numpy makes it even easy:
>>> import numpy as np
>>> from collections import Counter
>>> Counter(np.loadtxt('abc.txt', usecols=(0,), dtype=np.int))
Counter({0: 2, -1: 2, 1: 1})
If you only want the first column, then only split the first column. And use a dictionary to store the counts for each value.
count = dict()
for line in sys.stdin:
(t1, rest) = line.split(' ', 1)
try:
count[t1] += 1
except KeyError:
count[t1] = 1
for item in count:
print '%s occurs %i times' % (item, count[item])
Instead of using tuple unpacking, where you need a number of variables exactly equal to the number of parts returned by split(), you can just use the first element of those parts:
parts = re.split("\s+", line.strip())
t1 = parts[0]
or equivalently, simply
t1 = re.split("\s+", line.strip())[0]
import collections
def countFirstColum(fileName):
res = collections.defaultdict(int)
with open(fileName) as f:
for line in f:
key = line.split(" ")[0]
res[key] += 1;
return res
rows = []
for line in f:
column = line.strip().split(" ")
rows.append(column)
then you get a 2-dimensional array.
1st column:
for row in rows:
print row[0]
output:
1
0
0
-1
-1
This is from a script of mine with an infile, I checked and it works with standard input as infile:
dictionary = {}
for line in someInfile:
line = line.strip('\n') # if infile but you should
f = line.split() # do your standard input thing
dictionary[f[0]]=0
for line in someInfile:
line = line.strip('\n') # if infile but you should
f = line.split() # do your standard input thing
dictionary[f[0]]+=1
print dictionary

Categories

Resources