Replace one column in a line in a text file using Python - python

I have a text file and it has the following contents
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 0 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0
This is the input that I need to supply to a tool which used in some research application. Now I need to replace the 0 on the first line which starts with Li on the third column from the last. That is, there are four zeros towards the end in each of the lines starting with Li. I need to replace the second zero and so the file will have the the contents as follows:
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 1 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0
This has to be done a number of times for the zeros in various positions and I have the following code. There are some more operations that I am doing in the same code.
import os
import shutil
import time
def edit_file(column, next_column):
# Processing x.gin file
file_name = './' + column + '.gin'
file_handler = open(file_name, 'r')
print("Processing " + file_name + " file")
contents = file_handler.readlines()
find_line = contents[14]
find_line_array = find_line.split('\t')
print(find_line_array)
# change values according to the file name
if column == 'x':
find_line_array[8] = 1
elif column == 'y':
print(contents)
print(find_line_array)
find_line_array[9] = 1
elif column == 'z':
find_line_array[10] = 1
elif column == 'xy':
find_line_array[8] = 1
find_line_array[9] = 1
elif column == 'yz':
find_line_array[9] = 1
find_line_array[10] = 1
elif column == 'xz':
find_line_array[8] = 1
find_line_array[10] = 1
formatted = '\t'.join(map(str, find_line_array))
contents[14] = formatted
with open(file_name, 'w') as f:
for item in contents:
f.write("%s\n" % item)
print("Formatting completed for " + file_name)
print('Executing GULP command ----')
gulp_command = 'gulp ' + column + '.gin > ' + column + '.gout'
print(gulp_command)
shutil.copy(file_name, next_column+'.gin')
file_handler.close()
os.system(gulp_command)
while not os.path.exists('./Li.grs'):
print('Waiting for output file')
time.sleep(1)
if os.path.isfile('./Li.grs'):
print('renaming file')
os.rename('./Li.grs', next_column+'.gin')
os.rename('./Li.grs', column+'.grs')
return True
if __name__ == '__main__':
print('Starting Execution')
column_list = ['x', 'y', 'xy', 'yz', 'xz']
print(column_list)
for index, column in enumerate(column_list):
if column != 'xz':
edit_file(column, column_list[index + 1])
else:
edit_file(column, 'xz')
print('Execution completed')
I am replacing it correctly and rewriting the file. But this file doesn't appears to be in correct format as it has additional new lines. Is it possible that I can rewrite the single line only and so that I can keep the file in exact same format.

i created a function for that. try this
def replace(filename,row,column,value):
columnspan = " "
data = open(filename).read().split("\n")
for i in range(len(data)):
data[i] = data[i].split(columnspan)
data[row][column] = value
write=""
for i in range(len(data)):
for x in range(len(data[i])):
write+=(str(data[i][x])+columnspan)
write += "\n"
write.strip()
file = open(filename,"w")
file.write(write)
file.close()

You can use regex to find and update text:
import re
with open('input.txt', 'r') as f1, open('output.txt', 'w') as f2:
data = f1.read()
match = re.findall('Li\s+\w+\s+\d+\.\d+\s+\d\.\d+\s+\d\.\d+\s+\d\s+\d\s+\d\s+\d', data)
for m in match:
data = data.replace(m, f'{m[:-1]}1')
f2.write(data)
Output:
#
# Keywords:
#
LiFePO4
end
name Li5FeO4
cell
18.557309 18.316802 9.125725 90.047539 90.100646 90.060551 0 0 0 0 0 0
fractional 1
Li core 0.06001 0.059408 0.849507 1 1 0 1 0 0
Li1 core 0.025416 0.339078 0.128746 1 1 0 0 0 0
Li2 core 0.02517 0.838929 0.130747 1 1 0 0 0 0
Li3 core 0.525498 0.339179 0.127632 1 1 0 0 0 0
Li4 core 0.524753 0.841333 0.129329 1 1 0 0 0 0
Li5 core 0.179907 0.158182 0.634012 1 1 0 0 0 0
Li6 core 0.180817 0.666028 0.628327 1 1 0 0 0 0

Related

Python read from file that has multiple values

Ben
5 0 0 0 0 0 0 1 0 1 -3 5 0 0 0 5 5 0 0 0 0 5 0 0 0 0 0 0 0 0 1 3 0 1 0 -5 0 0 5 5 0 5 5 5 0 5 5 0 0 0 5 5 5 5 -5
Moose
5 5 0 0 0 0 3 0 0 1 0 5 3 0 5 0 3 3 5 0 0 0 0 0 5 0 0 0 0 0 3 5 0 0 0 0 0 5 -3 0 0 0 5 0 0 0 0 0 0 5 5 0 3 0 0
Reuven
I was wondering how to read multiple lines of this sort of file into a list or dictionary as I want the ratings which are the numbers to stay with the names of the person that corresponds with the rating
You could read the file in pairs of two lines and populate a dictionary.
path = ... # path to your file
out = {}
with open(path) as f:
# iterate over lines in the file
for line in f:
# the first, 3rd, ... line contains the name
name = line
# the 2nd, 4th, ... line contains the ratings
ratings = f.next() # by calling next here, we jump two lines per iteration
# write values to dictionary while using strip to get rid of whitespace
out[name.strip()] = [int(rating.strip()) for rating in ratings.strip().split(' ')]
It could also be done with a while loop:
path = ... # path to your file
out = {}
with open(path) as f:
while(True):
# read name and ratings, which are in consecutive lines
name = f.readline()
ratings = f.readline()
# stop condition: end of file is reached
if name == '':
break
# write values to dictionary:
# use name as key and convert ratings to integers.
# use strip to get rid of whitespace
out[name.strip()] = [int(rating.strip()) for rating in ratings.strip().split(' ')]
You can use zip to combine lines by pairs to form the dictionary
with open("file.txt","r") as f:
lines = f.read().split("\n")
d = { n:[*map(int,r.split())] for n,r in zip(lines[::2],lines[1::2]) }

Looping over a pandas column and creating a new column if it meets conditions

I have a pandas dataframe and I want to loop over the last column "n" times based on a condition.
import random as random
import pandas as pd
p = 0.5
df = pd.DataFrame()
start = []
for i in range(5)):
if random.random() < p:
start.append("0")
else:
start.append("1")
df['start'] = start
print(df['start'])
Essentially, I want to loop over the final column "n" times and if the value is 0, change it to 1 with probability p so the results become the new final column. (I am simulating on-off every time unit with probability p).
e.g. after one iteration, the dataframe would look something like:
0 0
0 1
1 1
0 0
0 1
after two:
0 0 1
0 1 1
1 1 1
0 0 0
0 1 1
What is the best way to do this?
Sorry if I am asking this wrong, I have been trying to google for a solution for hours and coming up empty.
Like this. Append col with name 1, 2, ...
# continue from question code ...
# colname is 1, 2, ...
for col in range(1, 5):
tmp = []
for i in range(5):
# check final col
if df.iloc[i,col-1:col][0] == "0":
if random.random() < p:
tmp.append("0")
else:
tmp.append("1")
else: # == 1
tmp.append("1")
# append new col
df[str(col)] = tmp
print(df)
# initial
s
0 0
1 1
2 0
3 0
4 0
# result
s 1 2 3 4
0 0 0 1 1 1
1 0 0 0 0 1
2 0 0 1 1 1
3 1 1 1 1 1
4 0 0 0 0 0

Find all the blocks

I am very new to python and coding. I have this homework that I have to do:
You will receive on the first line the rows of the matrix (n) and on the next n lines you will get each row of the matrix as a string (zeros and ones separated by a single space). You have to calculate how many blocks you have (connected ones horizontally or diagonally) Here are examples:
Input:
5
1 1 0 0 0
1 1 0 0 0
0 0 0 0 0
0 0 0 1 1
0 0 0 1 1
Output:
2
Input:
6
1 1 0 1 0 1
0 1 1 1 1 1
0 1 0 0 0 0
0 1 1 0 0 0
0 1 1 1 1 0
0 0 0 1 1 0
Output:
1
Input:
4
0 1 0 1 1 0
1 0 1 1 0 1
1 0 0 0 0 0
0 0 0 1 0 0
Output:
5
the code I came up with for now is :
n = int(input())
blocks = 0
matrix = [[int(i) for i in input().split()] for j in range(n)]
#loop or something to find the blocks in the matrix
print(blocks)
Any help will be greatly appreciated.
def valid(y,x):
if y>=0 and x>=0 and y<N and x<horizontal_len:
return True
def find_blocks(y,x):
Q.append(y)
Q.append(x)
#search around 4 directions (up, right, left, down)
dy = [0,1,0,-1]
dx = [1,0,-1,0]
# if nothing is in Q then terminate counting block
while Q:
y = Q.pop(0)
x = Q.pop(0)
for dir in range(len(dy)):
next_y = y + dy[dir]
next_x = x + dx[dir]
#if around component is valid range(inside the matrix) and it is 1(not 0) then include it as a part of block
if valid(next_y,next_x) and matrix[next_y][next_x] == 1:
Q.append(next_y)
Q.append(next_x)
matrix[next_y][next_x] = -1
N = int(input())
matrix = []
for rows in range(N):
row = list(map(int, input().split()))
matrix.append(row)
#row length
horizontal_len = len(matrix[0])
blocks = 0
#search from matrix[0][0] to matrix[N][horizontal_len]
for start_y in range(N):
for start_x in range(horizontal_len):
#if a number is 1 then start calculating
if matrix[start_y][start_x] == 1:
#make 1s to -1 for not to calculate again
matrix[start_y][start_x] = -1
Q=[]
#start function
find_blocks(start_y, start_x)
blocks +=1
print(blocks)
I used BFS algorithm to solve this question. The quotations are may not enough to understand the logic.
If you have questions about this solution, let me know!

Python: Constructing & Printing matrices

I need to create a matrix that calculates the LCS and then print it out. This is my code, but I'm having trouble with the print function (don't know how to get the LCSmatrix values into the printing)
def compute_LCS(seqA, seqB):
for row in seqA:
for col in seqB:
if seqA[row] == seqB[col]:
if row==0 or col==0:
LCSmatrix(row,col) = 1
else:
LCSmatrix(row,col) = LCS(row-1,col-1) + 1
else:
LCSmatrix(row,col) = 0
return LCSmatrix
def printMatrix(parameters...):
print ' ',
for i in seqA:
print i,
print
for i, element in enumerate(LCSMatrix):
print i, ' '.join(element)
matrix = LCSmatrix
print printMatrix(compute_LCS(seqA,seqB))
Any help would be much appreciated.
Try this:
seqA='AACTGGCAG'
seqB='TACGCTGGA'
def compute_LCS(seqA, seqB):
LCSmatrix = [len(seqB)*[0] for row in seqA]
for row in range(len(seqB)):
for col in range(len(seqA)):
if seqB[row] == seqA[col]:
if row==0 or col==0:
LCSmatrix[row][col] = 1
else:
LCSmatrix[row][col] = LCSmatrix[row-1][col-1] + 1_
else:
LCSmatrix[row][col] = 0
return LCSmatrix
def printMatrix(seqA, seqB, LCSmatrix):
print ' '.join('%2s' % x for x in ' '+seqA)
for i, element in enumerate(LCSmatrix):
print '%2s' % seqB[i], ' '.join('%2i' % x for x in element)
matrix = compute_LCS(seqA, seqB)
printMatrix(seqA, seqB, matrix)
The above produces:
A A C T G G C A G
T 0 0 0 1 0 0 0 0 0
A 1 1 0 0 0 0 0 1 0
C 0 0 2 0 0 0 1 0 0
G 0 0 0 0 1 1 0 0 1
C 0 0 1 0 0 0 2 0 0
T 0 0 0 2 0 0 0 0 0
G 0 0 0 0 3 1 0 0 1
G 0 0 0 0 1 4 0 0 1
A 1 1 0 0 0 0 0 1 0

how to search and replace for multiple value of text file in python?

My data:
1 255 59 0 1 255 0 1 1 0 4 0 5
0 1 255 1 253 0 90 1 1 0 2 0 233
I'm new to python and I need to replace all value in text file by
use conditions:
If value in line = 255, Replace as 1
If value in line < 255 (254, 253, 252...), Replace as 0
So, My target data is:
1 1 0 0 1 1 0 1 1 0 0 0 0
0 1 1 1 0 0 0 1 1 0 0 0 0
How can I solve this problem?
Actually, I tried to fix it but doesn't work with my data.
This is my code:
f1 = open('20130103.txt','r')
f2 = open('20130103_2.txt','w')
count = 255
for line in f1:
line = line.replace('255','1')
count = count-1
line = line.replace('%d'%(count), '0')
f2.write(line)
f1.close()
f2.close()
And result is:
1 1 59 0 1 1 0 1 1 0 4 0 5
0 1 1 1 0 0 90 1 1 0 2 0 233
The eye catcher here is the number 255, for which you would want to display 1, which clearly indicates, you are only interested in the 8th bit.
The only case which looks strange is how your o/p not conforming with your requirement, does not manipulate 1 in which case, you have to leave it out of your transformation
If I have to trust your requirement
>>> with open("test.in") as fin, open("test.out","w") as fout:
for line in fin:
line = (e >> 7 for e in map(int, line.split()))
fout.write(''.join(map(str, line)))
fout.write('\n')
If I have to trust your data
>>> with open("test.in") as fin, open("test.out","w") as fout:
for line in fin:
line = (e >> 7 if e != 1 else 1 for e in map(int, line.split()))
fout.write(''.join(map(str, line)))
fout.write('\n')
Another alternate view to this problem.
>>> with open("test.in") as fin, open("test.out","w") as fout:
for line in fin:
line = (e / 255 if e != 1 else 1 for e in map(int, line.split()))
fout.write(''.join(map(str, line)))
fout.write('\n')
Here's an example of how you can pass re.sub a function -- instead of a replace string -- to gain really fine control over how replacements are done:
import re
lines = ['1 255 59 0 1 255 0 1 1 0 4 0 5',
'0 1 255 1 253 0 90 1 1 0 2 0 233']
def do_replace(match):
number = int(match.group(0))
if number == 0 or number == 1:
return str(number)
elif number == 255:
return '1'
elif number < 255:
return '0'
else:
raise ValueError
for line in lines:
print re.sub(r'\d+', do_replace, line)
prints:
1 1 0 0 1 1 0 1 1 0 0 0 0
0 1 1 1 0 0 0 1 1 0 0 0 0
You could do something like this:
with open('20130103.txt', 'r') as f1, open('20130103_2.txt', 'w') as f2:
for line in f1:
values = line.split()
new_values = ' '.join('1' if value == '255' else '0' for value in values)
f2.write(new_values + '\n')
line.split() splits the line into chunks. 'a b'.split() == ['a', 'b']
'1' if value == '255' else '0' for value in values is a generator that just outputs '1' or '0', depending on the value of an item in your list of values.
' '.join() joins together the values in the list (or generator, in this case) with a space.

Categories

Resources