I have a python program where I pass in a file, I read this file and then split the line at a colon.
I then print both these parts, do some checking on it and pass it into a function where if its a match it prints out the match then returns. However I cannot figure out how to then get the next line in my file, the program currently just keeps going over and over on that one line
with open(myfile,'r') as hf:
for l in hf:
part1 = l.split(":")[0].strip()
part2 = l.split(":")[1].strip()
print part1
print part2
print "**************"
for file in filenames:
print "Starting " + file
if ".txt" in file or ".lst" in file:
file = os.path.join(mypath, file)
with open(file,'r') as f:
for line in f:
for word in line.split():
ThenWord(part2,word)
I have tried break, continue and else, along with next() but I can't seem to get it working, or it's in the wrong place.
How would I get the next line from the open file and then start the for loop again to split at the colon, line 3 and 4.
EDIT:
I have added in 2 breaks, but the files I try and match the word to (for file in filenames) only reads the first file then moves onto the next line from myfile.
with open(myfile,'r') as hf:
for l in hf:
part1 = l.split(":")[0].strip()
part2 = l.split(":")[1].strip()
print part1
print part2
print "**************"
for file in filenames:
print "Starting " + file
if ".txt" in file or ".lst" in file:
file = os.path.join(mypath, file)
with open(file,'r') as f:
for line in f:
for word in line.split():
ThenWord(part2,word)
break
break
def ThenWord(salt,word):
salted = salt + word
m = hashlib.md5()
m.update(salted)
if m.hexdigest() == hash:
print "************ " + hash + " ************"
print "******* Enough said - " + word + " ******* "
return
I want it so that once it has found a match, it moves on to the next hash in the file (myfile) without scanning through every other file in filenames.
It finally appears that your problem is to exit a deeply nested loop. A possible solution is to raise an exception
class MatchFoundException(Exception):
pass
with open(myfile, 'r') as hf:
for ...
...
try:
for file in filenames:
...
for word in line.split():
if ThenWord(part2, word):
raise MatchFoundException(('Found', part2, word))
except MatchFoundException:
# do something
else:
# optionally do something
You need to change ThenWord to return True or False for example.
Related
I have a Python script that opens text files and then saves them as newly parsed files.
The code works: print(lines) on line 69 prints the parsed contents and the names of the files are also output if writing is successful.
However, if I remove or comment all of the code below line 70, print(lines) on 69 no longer works.
import os
import re
def parse_file(file_path):
# open the file
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# read the contents
contents = f.read()
# split the contents into lines
lines = contents.split('\n')
# remove any line that includes the string "END OF TEXT FILE"
lines = [line for line in lines if "END OF TEXT FILE" not in line]
# add the part string
lines[0] = "PART, " + lines[0]
# filters out empty lines
lines = [line for line in lines if line.strip()]
# replace asterisks with an empty string
lines = [line.replace("*", "") for line in lines]
# replace asterisks with an empty string
lines = [line.replace("•", "") for line in lines]
# replace GB with an empty string
lines = [re.sub(r'\sGB', '', line) for line in lines]
# replace "MHz" with an empty string
lines = [re.sub(r'\sMHz', '', line) for line in lines]
# replace "mm" with an empty string
lines = [re.sub(r'\smm', '', line) for line in lines]
# replace W with an empty string
lines = [re.sub(r'\sw', '', line) for line in lines]
# combine the first line and second line if the second line does not start with "VRAM" or "Lighting"
if len(lines) > 1 and not lines[1].startswith("VRAM") and not lines[1].startswith("Lighting"):
lines[0] = lines[0] + " " + lines[1]
lines.pop(1)
# replace any ":" with ","
lines = [line.replace(":", ",") for line in lines]
# Trim the last entry in the list by extracting the digits after the dollar sign using a regular expression
dollar_amount_regex = r'\$(\d+)'
last_entry = lines[-1]
dollar_amount_match = re.search(dollar_amount_regex, last_entry)
# Extract the digits and construct a new string without the dollar sign
dollar_amount_digits = dollar_amount_match.group(1)
dollar_amount = dollar_amount_digits
# Replace the original string with the dollar amount
lines[-1] = dollar_amount
# add "Price, " to the last line
lines[-1] = "Price, " + lines[-1]
# remove any extra whitespace from each line
lines = [line.strip() for line in lines]
# CHECK OUTPUT
print(lines)
# WRITING TO FILES
# extract the first line from the list of lines
file_name = lines[0].strip()
# check if the lines contain the word "VRAM"
if any("VRAM" in line for line in lines):
# add "gpu-" to the front of the file name
file_name = "GPU - " + file_name
# check if the lines contain both "RAM Type" and "Frequency"
if any("RAM Type" in line for line in lines) and any("Frequency" in line for line in lines):
# add "ram-" to the front of the file name
file_name = "RAM - " + file_name
# create a new file path by joining the directory "D:\\Dev\\PAD\\pcbs-parsed-text" with the first line as the file name and .txt extension
new_file_path = os.path.join("D:\\Dev\\PAD\\pcbs-parsed-text", file_name + ".txt")
# save the lines to this file
with open(new_file_path, 'w') as f:
for line in lines:
f.write(line + '\n')
return file_name
# search for files in the folder "D:\\Dev\\PAD\\pcbs-raw-text" with "output" in the title
raw_text_dir = "D:\\Dev\\PAD\\pcbs-raw-text"
files = [f for f in os.listdir(raw_text_dir) if "output" in f]
successes = []
errors = []
# parse each file
for file in files:
file_path = os.path.join(raw_text_dir, file)
try:
file_name = parse_file(file_path)
successes.append(file_name)
except Exception as e:
errors.append(file_name)
print(f"Error parsing file {file_name}: {e}")
# check for success and print success or error, listing each file created
if errors:
print(f"Error parsing files: {errors}")
else:
print(f"Successfully parsed and saved files: {successes}")
I expected the print(lines) call to print the contents of the variable to the console but nothing happens.
To the best of my ability I've checked that print(lines) is not inside a conditional or a returned function.
I thought that Python otherwise executes top down, so I'm not sure about this one.
Still learning so it's probably something silly! Thanks.
Been trying to write my PYTHON code but it will always output the file with a blank line at the end. Is there a way to mod my code so it doesn't print out the last blank line.
def write_concordance(self, filename):
""" Write the concordance entries to the output file(filename)
See sample output files for format."""
try:
file_out = open(filename, "w")
except FileNotFoundError:
raise FileNotFoundError("File Not Found")
word_lst = self.concordance_table.get_all_keys() #gets a list of all the words
word_lst.sort() #orders it
for i in word_lst:
ln_num = self.concordance_table.get_value(i) #line number list
ln_str = "" #string that will be written to file
for c in ln_num:
ln_str += " " + str(c) #loads line numbers as a string
file_out.write(i + ":" + ln_str + "\n")
file_out.close()
Output_file
Line 13 in this picture is what I need gone
Put in a check so that the new line is not added for the last element of the list:
def write_concordance(self, filename):
""" Write the concordance entries to the output file(filename)
See sample output files for format."""
try:
file_out = open(filename, "w")
except FileNotFoundError:
raise FileNotFoundError("File Not Found")
word_lst = self.concordance_table.get_all_keys() #gets a list of all the words
word_lst.sort() #orders it
for i in word_lst:
ln_num = self.concordance_table.get_value(i) #line number list
ln_str = "" #string that will be written to file
for c in ln_num:
ln_str += " " + str(c) #loads line numbers as a string
file_out.write(i + ":" + ln_str)
if i != word_lst[-1]:
file_out.write("\n")
file_out.close()
The issue is here:
file_out.write(i + ":" + ln_str + "\n")
The \n adds a new line.
The way to fix this is to rewrite it slightly:
ln_strs = []
for i in word_lst:
ln_num = self.concordance_table.get_value(i) #line number list
ln_str = " ".join(ln_num) #string that will be written to file
ln_strs.append(f"{i} : {ln_str}")
file_out.write('\n'.join(ln_strs))
Just btw, you should actually not use file_out = open() and file_out.close() but with open() as file_out:, this way you always close the file and an exception won't leave the file hanging
I'm creating a program that should create a file (.txt) based on each line of 'clouds.txt'. This is my code:
def CreateFile():
global file_name
f = open(file_name,"w+")
f.write(list_email + ":")
f.close()
def WriteInConfig():
f = open("config/config.txt","a")
f.write(list_name + "\n")
f.close()
with open("clouds.txt","r") as f:
list_lines = sum(1 for line in open('clouds.txt'))
lines = f.readline()
for line in lines:
first_line = f.readline().strip()
list_email = first_line.split('|')[1] #email
print("Email: " + list_email)
list_pass = first_line.split('|')[2] #pass
print("Pass: " + list_pass)
list_name = first_line.split('|')[3] #name
print(list_name)
global file_name
file_name = "config/." + list_name + ".txt"
with open('clouds.txt', 'r') as fin:
data = fin.read().splitlines(True)
with open('clouds.txt', 'w') as fout:
fout.writelines(data[1:])
CreateFile()
WriteInConfig()
The clouds.txt file looks like this:
>|clouds.n1c0+mega01#gmail.com|cwSHklDIybllCD1OD4M|Mega01|15|39.91|FdUkLiW0ThDeDkSlqRThMQ| |x
|clouds.n1c0+mega02#gmail.com|tNFVlux4ALC|Mega02|50|49.05|lq1cTyp13Bh9-hc6cZp1RQ|xxx|x
|clouds.n1c0+mega03#gmail.com|7fe4196A4CUT3V|Mega03|50|49.94|BzW7NOGmfhQ01cy9dAdlmg|xxx|xxx >
Everything works fine until 'Mega48'. There I get "IndexError: list index out of range"
>|clouds.n1c0+mega47#gmail.com|bd61t9zxcuC1Yx|Mega47|50|10|Xjff6C8mzEqpa3VcaalUuA|xxx|x
|clouds.n1c0+mega48#gmail.com|kBdnyB6i0PUyUb|Mega48|50|0|R6YfuGP2hvE-uds0ylbQtQ|xxx|x
|clouds.n1c0+mega49#gmail.com|OcAdgpS4tmSLTO|Mega49|50|28.65|xxx| >
I checked and there are no spaces/other characters. As you could see, after creating the file, the program deletes the line. After the error, if I'm starting the program again (and starts from 'Mega47') it doesn't show the error, and everything works as planned.
Any ideas how to fix this?
I see many mistakes in your code. First, what do you want with this list_lines = sum(1 for line in open('clouds.txt'))?
You have a problem in your for loop because you did lines = f.readline() so lines is the first line, then you do for line in lines where line will be each character of the first line and there are more character in the first line than lines in your file to read.
[edited]
you don't need to know the number of lines in the file to do a for loop. You can just do for line in f:, then you don't need to read the line again with readline it is already in the variable line
def deleteEmployee(self,code,name):
with open("employee.data","r+") as file:
# data=file.readlines()
for num, i in enumerate(file,1):
print(i)
a=i[:len(i)-1]
if str(a)==str(code):
print("found at",num)
file.seek(num)
file.write("\n")
file.close()
I just want to write a file handling code. Here I define delete function where I want to delete particular code if exists inside the file but it's not working.
This code should achieve what you want:
def deleteEmployee(self,code,name):
with open("employee.data","r+") as file:
new_content = ""
for num, line in enumerate(file,1):
print(line)
a=line[:-1]
if str(a)==str(code):
print("found at ",num)
new_content += "\n" #Adds newline instead of 'bad' lines
else:
new_content += line #Adds line for 'good' lines
file.seek(0) #Returns to start of file
file.write(new_content) #Writes cleaned content
file.truncate() #Deletes 'old' content from rest of file
file.close()
I am trying to replace text in a text file by reading each line, testing it, then writing if it needs to be updated. I DO NOT want to save as a new file, as my script already backs up the files first and operates on the backups.
Here is what I have so far... I get fpath from os.walk() and I guarantee that the pathmatch var returns correctly:
fpath = os.path.join(thisdir, filename)
with open(fpath, 'r+') as f:
for line in f.readlines():
if '<a href="' in line:
for test in filelist:
pathmatch = file_match(line, test)
if pathmatch is not None:
repstring = filelist[test] + pathmatch
print 'old line:', line
line = line.replace(test, repstring)
print 'new line:', line
f.write(line)
But what ends up happening is that I only get a few lines (updated correctly, mind you, but repeated from earlier in the file) corrected. I think this is a scoping issue, afaict.
*Also: I would like to know how to only replace the text upon the first instance of the match, for ex., I don't want to match the display text, only the underlying href.
First, you want to write the line whether it matches the pattern or not. Otherwise, you're writing out only the matched lines.
Second, between reading the lines and writing the results, you'll need to either truncate the file (can f.seek(0) then f.truncate()), or close the original and reopen. Picking the former, I'd end up with something like:
fpath = os.path.join(thisdir, filename)
with open(fpath, 'r+') as f:
lines = f.readlines()
f.seek(0)
f.truncate()
for line in lines:
if '<a href="' in line:
for test in filelist:
pathmatch = file_match(line, test)
if pathmatch is not None:
repstring = filelist[test] + pathmatch
line = line.replace(test, repstring)
f.write(line)
Open the file for read and copy all of the lines into memory. Close the file.
Apply your transformations on the lines in memory.
Open the file for write and write out all the lines of text in memory.
with open(filename, "r") as f:
lines = (line.rstrip() for line in f)
altered_lines = [some_func(line) if regex.match(line) else line for line in lines]
with open(filename, "w") as f:
f.write('\n'.join(altered_lines) + '\n')
A (relatively) safe way to replace a line in a file.
#!/usr/bin/python
# defensive programming style
# function to replace a line in a file
# and not destroy data in case of error
def replace_line(filepath, oldline, newline ):
"""
replace a line in a temporary file,
then copy it over into the
original file if everything goes well
"""
# quick parameter checks
assert os.exists(filepath) # !
assert ( oldline and str(oldline) ) # is not empty and is a string
assert ( newline and str(newline) )
replaced = False
written = False
try:
with open(filepath, 'r+') as f: # open for read/write -- alias to f
lines = f.readlines() # get all lines in file
if oldline not in lines:
pass # line not found in file, do nothing
else:
tmpfile = NamedTemporaryFile(delete=True) # temp file opened for writing
for line in lines: # process each line
if line == oldline: # find the line we want
tmpfile.write(newline) # replace it
replaced = True
else:
tmpfile.write(oldline) # write old line unchanged
if replaced: # overwrite the original file
f.seek(0) # beginning of file
f.truncate() # empties out original file
for tmplines in tmpfile:
f.write(tmplines) # writes each line to original file
written = True
tmpfile.close() # tmpfile auto deleted
f.close() # we opened it , we close it
except IOError, ioe: # if something bad happened.
printf ("ERROR" , ioe)
f.close()
return False
return replaced and written # replacement happened with no errors = True
(note: this replaces entire lines only , and all of the lines that match in the file)