global file_platform
file_platform='karbarge.DAT'
Dir=30
Hs=4
Tp=12
dummy=[]
newval=[Dir, Hs, Tp]
def replace():
oldval=[]
line_no=[]
search_chars=['WaveDir', 'WaveHs', 'WaveTp']
with open(file_platform) as f_input:
for line_number, line in enumerate(f_input, start=1):
for search in search_chars:
if search in line:
first_non_space = line.strip().split(' ')[0]
x=search_chars.index(search)
dummy.append((search, first_non_space, line_number, x))
print (search, first_non_space, line_number, x)
i=0
j=0
for line in fileinput.input(file_platform, inplace=1):
print (i)
if i==eval(dummy[j][2]):
line = line.replace(eval(dummy[j][1]),str(newval[dummy[j][3]]))
j=j+1
sys.stdout.write(line)
i=i+1
return
replace()
Aim is to select the first non-space value based on keywords in a file and append it with index. Then using index replacing it in the same file wit new values. I'm successful in picking the existing values in file, but could replace with new values accordingly and save it in the same filename. Here is the code, i tried.
Could anyone suggest modifications in them or suggest a better code that is simple and clear.?
I honestly didn't understand 100% of your goal, but I would like to propose a cleaner solution that might suit your needs. The main idea would be to get each line of the old file, replace the values accordingly, store the resulting lines in a list and finally storing this list of lines in a new file.
# This dict associates the old values to the values to which they should be changed
replacing_dict = {'WaveDir': Dir, 'WaveHs': Hs, 'WaveTp': Tp}
new_lines = []
with open(file_platform) as f_input:
for line in f_input:
first_non_space = line.strip().split(' ')[0]
if first_non_space in replacing_dict:
line.replace(first_non_space, replacing_dict[first_non_space])
new_lines.append(line)
# Write new_lines into a file and, optionally, replace this new file for the old file
Related
My Problem is the following
I have one text file, it contains more than 1000 rows, want to read files line by line
I am trying this code, but not getting expected output
my source file:
uuid;UserGroup;Name;Description;Owner;Visibility;Members ----> header of the file
id:;group1;raji;xyzabc;ramya;public;
abc
def
geh
id:group2;raji;rtyui;ramya;private
cvb
nmh
poi
import csv
output=[]
temp=[]
fo = open ('usergroups.csv', 'r')
for line in fo:
#next(uuid)
line = line.strip()
if not line:
continue #ignore empty lines
#temp.append(line)
if not line.startswith('id:') and not None:
temp.append(line)
print(line)
else:
if temp:
line += ";" + ",".join(temp)
temp.clear()
output.append(line)
print("\n".join(output))
with open('new.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(output)
i am getting this output:
id;group1;raji;xyzabc;ramya;public;uuid;UserGroup;Name;Description;Owner;Visibility;Members
id:group2;raji;rtyui;ramya;private;abc,def,geh
So whenever a line does not start with 'id' it should be appended to the previous line.
my desired output:
uuid;UserGroup;Name;Description;Owner;Visibility;Members ----> header of the file
id;group1;raji;xyzabc;ramya;public;abc,def,geh
id:group2;raji;rtyui;ramya;private;cvb,nmh,poi
There are a few mistakes. I'll only show the relevant corrections:
Use
if not line.startswith('id'):
No 'id:', since you also have a line starting with 'id;', plus you state yourself that a line has to start with "id" (no ":" there). The and if None part is unneccessary, because it's always true.
The other part:
output.append(line.split(';'))
because writerows need an iterable (list) of "row" objects, and a row object is a list of string. So you need a list of lists, which the above is, thanks to the extra split.
(Of course, now the line print("\n".join(output)) fails, but writer.writerows(output) works.)
I don't know if it will help you but with regex, this problem is solved in a very simple way. I leave here the code in case you are interested.
import regex as re
input_text = """uuid;UserGroup;Name;Description;Owner;Visibility;Members ----> header of the file
id;group1;raji;xyzabc;ramya;public;
abc
def
geh
id:group2;raji;rtyui;ramya;private
cvb
nmh
poi"""
formatted = re.sub(r"\n(?!(id|\n))", "", input_text)
print(formatted)
uuid;UserGroup;Name;Description;Owner;Visibility;Members ----> header of the file
id;group1;raji;xyzabc;ramya;public;abcdefgeh
id:group2;raji;rtyui;ramya;privatecvbnmhpoi
This code just replace the regular expression \n(?!(id|id|n)) with the empty string. This regular expression will replace all line breaks that are not followed by the word "id" or another line break (so we keep the space between the two lines of ids).
Writing to a file has not been included here, but there is a list of strings available to work with, as in your original code.
Note: this is not really an answer to your question, as it is a solution to your problem
The structure is by and large the same, with a few changes for readability.
Readable code is easier to get right
import csv
output = []
temp = []
currIdLine = ""
with( open ('usergroups.csv', 'r')) as f:
for dirtyline in f.readlines():
line = dirtyline.strip()
if not line:
print("Skipping empty line")
continue
if line.startswith('uuid'): # append the header to the output
output.append(line)
continue
if line.startswith('id'):
if temp:
print(temp)
output.append(currIdLine + ";" + ','.join(temp)) #based on current input, there is a bug here where the output will contain two sequential ';' characters
temp.clear()
currIdLine = line
else:
temp.append(line)
output.append(currIdLine + ";" + ','.join(temp))
print(output)
I've attempted to find this but I end up finding code that replaces that one specific word.
I've been using this code:
phrase = open(club, 'a')
for line in phrase:
if line.contains(name):
#what do i put here?
else:
pass
else:
pass
so lets say I have a text file which contains sam 10 and I want to replace this with sam 5. If I were to do this with the above code how would I? The name will stay the same but the number will not. Since the number is different to each name I'll be unable to search for the number which is why I'm searching for the name. I was thinking of using line.replace but that only changes the one phrase whereas I would want the whole line to change.
Edit: This would be made under the assumption that the text file has multiple names of different people with different numbers. I would want it to search for that specific name and replace the whole line.
Thanks!
You can check line by line to modify and modify the original to save changes:
f = open('sample.txt', 'r')
lines = f.readlines()
f.close()
name = 'ali'
for i in range(len(lines) - 1):
line = lines[i]
#Find 'name' index (-1 if not found)
index = line.find(name)
# If the wanted name is found
if index != -1:
#Split line by spaces to get name and number starting from name
words = line[index:].split()
# Name and number should be the first two elements in words list
name_number = words[0]+ ' ' + '5' #words[1]
#
# Do some thing with name_number
#
# the last '1' is to skip copying a space
line = line[:index] + name_number + ' ' + line[index + len(name_number) + 1:]
# Save result
lines[i] = line
output = open('sample.txt', 'w')
for line in lines:
output.write(line)
output.close()
In general, there are many ways, one of them would be:
Using built-in method:
with open(club, 'r') as in_file, open('new_file', 'w') as out_file:
for line in in_file:
if name in line:
out_file.write(new_line)
else:
out_file.write(line)
But this has the effect of creating a new file.
EDIT:
If you want to do inplace replacement, then you can use fileinput module, this way:
for line in fileinput.input(club, inplace=True):
if name in line:
line = 'NEW LINE' #Construct your new line here
print(line) #This will print it into the file
else:
print(line) #No Changes to be made, print line into the file as it is
Quoting from docs:
class fileinput.FileInput(files=None, inplace=False, backup='', bufsize=0, mode='r', openhook=None)
...
Optional in-place filtering:
if the keyword argument inplace=True is passed to fileinput.input() or
to the FileInput constructor, the file is moved to a backup file and
standard output is directed to the input file (if a file of the same
name as the backup file already exists, it will be replaced silently).
This makes it possible to write a filter that rewrites its input file
in place.
I have a CSV file that looks like this
a,b,c
d1,g4,4m
t,35,6y
mm,5,m
I'm trying to replace all the m's and y's preceded by a number with 'month' and 'year' respectively. I'm using the following script.
import re,csv
out = open ("out.csv", "wb")
file = "in.csv"
with open(file, 'r') as f:
reader = csv.reader(f)
for ss in reader:
s = str(ss)
month_pair = (re.compile('(\d\s*)m'), 'months')
year_pair = (re.compile('(\d\s*)y'), 'years')
def substitute(s, pairs):
for (pattern, substitution) in pairs:
match = pattern.search(s)
if match:
s = pattern.sub(match.group(1)+substitution, s)
return s
pairs = [month_pair, year_pair]
print (substitute(s, pairs))
It does replace but it does that only on the last row, ignoring the ones before it. How can I have it iterate over all the rows and write to another csv file?
You can use positive look-behind :
>>> re.sub(r'(?<=\d)m','months',s)
'a,b,c\nd1,g4,4months\nt,35,6y\nmm,5,m'
>>> re.sub(r'(?<=\d)y','years',s)
'a,b,c\nd1,g4,4m\nt,35,6years\nmm,5,m'
In this line
print (substitute(s, pairs))
your variable s is only the last line in your file. Note how you update s in your file reading to be the current line.
Solutions (choose one):
You could try another for-loop to iterate over all lines.
Or move the substitution into the for-loop where you read the lines of the file. This is definitely the better solution!
You can easily lookup how to write a new file or change the file you are working on.
I use this code below to combine all csv files : below each file has 10,000 rows :
billing_report_2014-02-01.csv
billing_report_2014-02-02.csv
:
fout=open("out.csv","a")
for num in range(1,10):
print num
for line in open("billing_report_2014-02-0"+str(num)+".csv"):
fout.write(line)
for num in range(10,29):
print num
for line in open("billing_report_2014-02-"+str(num)+".csv"):
fout.write(line)
fout.close()
but now I want to add new date column to the out.csv file how can I add date column and have value of "2014-02-01" to every row that I append billing_report_2014-02-01 to out.csv, and
value of "2014-02-02" to every row that I append billing_report_2014-02-02 to out.csv how can I approach this ?
List the filenames you want to work on, then take the data from that, build a generator over the input file that removes trailing new lines, and adds a new field with the date... eg:
filenames = [
'billing_report_2014-02-01.csv',
'billing_report_2014-02-02.csv'
]
with open('out.csv', 'w') as fout:
for filename in filenames:
to_append = filename.rpartition('_')[2].partition('.')[0]
with open(filename) as fin:
fout.writelines('{},{}\n'.format(line.rstrip(),to_append) for line in fin)
I think you can just add the date at the end:
for line in open("billing_report_2014-02-0"+str(num)+".csv"):
fout.write(line+',DATE INFORMATION')
I am presuming your CSV is really comma separated, if it is tab separted the characters should be \t
you could also use an intermediate step by changing line:
line = line + ', DATE INFORMATION'
as you are trying to add the file name date just add it per variable:
line = line + ', 2014-02-0'+ str(num//10)
you could use the replace function if it is always the ",LLC" string expression, see the example below
>>> string = "100, 90101, California, Example company,LLC, other data"
>>> string.replace(',LLC',';LLC')
'100, 90101, California, Example company;LLC, other data'
>>>
putting it all together and trying to bring some of the inspiration from #Jon CLements in as well (KUDOS!):
def combine_and_add_date(year, month, startday, endday, replace_dict):
fout=open("out.csv","a")
for num in range(startday,endday+1):
daynum = str(num)
if len(daynum) ==1:
daynum = '0'+daynum
date_info = str(year+'-'month+'-'+daynum)
source_name = 'billing_report_'+date_info+'.csv'
for line in open(source_name):
for key in replace_dict:
line.replace(key,replact_dict[key])
fout.write(line+','+date_info)
fout.close()
I hope this works and you should (hopefully I am a newb...) use it like this, note the dictionary is designed to allow you to make all kinds of replacements
combine_and_add_date("2014","02",1,28, {',LLC': ';LLC', ',PLC':';PLC'})
fingers crossed
So, basically, I need a program that opens a .dat file, checks each line to see if it meets certain prerequisites, and if they do, copy them into a new csv file.
The prerequisites are that it must 1) contain "$W" or "$S" and 2) have the last value at the end of the line of the DAT say one of a long list of acceptable terms. (I can simply make-up a list of terms and hardcode them into a list)
For example, if the CSV was a list of purchase information and the last item was what was purchased, I only want to include fruit. In this case, the last item is an ID Tag, and I only want to accept a handful of ID Tags, but there is a list of about 5 acceptable tags. The Tags have very veriable length, however, but they are always the last item in the list (and always the 4th item on the list)
Let me give a better example, again with the fruit.
My original .DAT might be:
DGH$G$H $2.53 London_Port Gyro
DGH.$WFFT$Q5632 $33.54 55n39 Barkdust
UYKJ$S.52UE $23.57 22#3 Apple
WSIAJSM_33$4.FJ4 $223.4 Ha25%ek Banana
Only the line: "UYKJ$S $23.57 22#3 Apple" would be copied because only it has both 1) $W or $S (in this case a $S) and 2) The last item is a fruit. Once the .csv file is made, I am going to need to go back through it and replace all the spaces with commas, but that's not nearly as problematic for me as figuring out how to scan each line for requirements and only copy the ones that are wanted.
I am making a few programs all very similar to this one, that open .dat files, check each line to see if they meet requirements, and then decides to copy them to the new file or not. But sadly, I have no idea what I am doing. They are all similar enough that once I figure out how to make one, the rest will be easy, though.
EDIT: The .DAT files are a few thousand lines long, if that matters at all.
EDIT2: The some of my current code snippets
Right now, my current version is this:
def main():
#NewFile_Loc = C:\Users\J18509\Documents
OldFile_Loc=raw_input("Input File for MCLG:")
OldFile = open(OldFile_Loc,"r")
OldText = OldFile.read()
# for i in range(0, len(OldText)):
# if (OldText[i] != " "):
# print OldText[i]
i = split_line(OldText)
if u'$S' in i:
# $S is in the line
print i
main()
But it's very choppy still. I'm just learning python.
Brief update: the server I am working on is down, and might be for the next few hours, but I have my new code, which has syntax errors in it, but here it is anyways. I'll update again once I get it working. Thanks a bunch everyone!
import os
NewFilePath = "A:\test.txt"
Acceptable_Values = ('Apple','Banana')
#Main
def main():
if os.path.isfile(NewFilePath):
os.remove(NewFilePath)
NewFile = open (NewFilePath, 'w')
NewFile.write('Header 1,','Name Header,','Header 3,','Header 4)
OldFile_Loc=raw_input("Input File for Program:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile:
LineParts = line.split()
if (LineParts[0].find($W)) or (LineParts[0].find($S)):
if LineParts[3] in Acceptable_Values:
print(LineParts[1], ' is accepted')
#This Line is acceptable!
NewFile.write(LineParts[1],',',LineParts[0],',',LineParts[2],',',LineParts[3])
OldFile.close()
NewFile.close()
main()
There are two parts you need to implement: First, read a file line by line and write lines meeting a specific criteria. This is done by
with open('file.dat') as f:
for line in f:
stripped = line.strip() # remove '\n' from the end of the line
if test_line(stripped):
print stripped # Write to stdout
The criteria you want to check for are implemented in the function test_line. To check for the occurrence of "$W" or "$S", you can simply use the in-Operator like
if not '$W' in line and not '$S' in line:
return False
else:
return True
To check, if the last item in the line is contained in a fixed list, first split the line using split(), then take the last item using the index notation [-1] (negative indices count from the end of a sequence) and then use the in operator again against your fixed list. This looks like
items = line.split() # items is an array of strings
last_item = items[-1] # take the last element of the array
if last_item in ['Apple', 'Banana']:
return True
else:
return False
Now, you combine these two parts into the test_line function like
def test_line(line):
if not '$W' in line and not '$S' in line:
return False
items = line.split() # items is an array of strings
last_item = items[-1] # take the last element of the array
if last_item in ['Apple', 'Banana']:
return True
else:
return False
Note that the program writes the result to stdout, which you can easily redirect. If you want to write the output to a file, have a look at Correct way to write line to file in Python
inlineRequirements = ['$W','$S']
endlineRequirements = ['Apple','Banana']
inputFile = open(input_filename,'rb')
outputFile = open(output_filename,'wb')
for line in inputFile.readlines():
line = line.strip()
#trailing and leading whitespace has been removed
if any(req in line for req in inlineRequirements):
#passed inline requirement
lastWord = line.split(' ')[-1]
if lastWord in endlineRequirements:
#passed endline requirement
outputFile.write(line.replace(' ',','))
#replaced spaces with commas and wrote to file
inputFile.close()
outputFile.close()
tags = ['apple', 'banana']
match = ['$W', '$S']
OldFile_Loc=raw_input("Input File for MCLG:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile.readlines(): # Loop through the file
line = line.strip() # Remove the newline and whitespace
if line and not line.isspace(): # If the line isn't empty
lparts = line.split() # Split the line
if any(tag.lower() == lparts[-1].lower() for tag in tags) and any(c in line for c in match):
# $S or $W is in the line AND the last section is in tags(case insensitive)
print line
import re
list_of_fruits = ["Apple","Bannana",...]
with open('some.dat') as f:
for line in f:
if re.findall("\$[SW]",line) and line.split()[-1] in list_of_fruits:
print "Found:%s" % line
import os
NewFilePath = "A:\test.txt"
Acceptable_Values = ('Apple','Banana')
#Main
def main():
if os.path.isfile(NewFilePath):
os.remove(NewFilePath)
NewFile = open (NewFilePath, 'w')
NewFile.write('Header 1,','Name Header,','Header 3,','Header 4)
OldFile_Loc=raw_input("Input File for Program:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile:
LineParts = line.split()
if (LineParts[0].find(\$W)) or (LineParts[0].find(\$S)):
if LineParts[3] in Acceptable_Values:
print(LineParts[1], ' is accepted')
#This Line is acceptable!
NewFile.write(LineParts[1],',',LineParts[0],',',LineParts[2],',',LineParts[3])
OldFile.close()
NewFile.close()
main()
This worked great, and has all the capabilities I needed. The other answers are good, but none of them do 100% of what I needed like this one does.