Using this Python code I get printed lines of file in UPPERCASE but file remains unchanged (lowercase.)
def open_f():
while True:
fname=raw_input("Enter filename:")
if fname != "done":
try:
fhand=open(fname, "r+")
break
except:
print "WRONG!!!"
continue
else: exit()
return fhand
fhand=open_f()
for line in fhand:
ss=line.upper().strip()
print ss
fhand.write(ss)
fhand.close()
Can you suggest please why files remain unaffected?
Code:
def file_reader(read_from_file):
with open(read_from_file, 'r') as f:
return f.read()
def file_writer(read_from_file, write_to_file):
with open(write_to_file, 'w') as f:
f.write(file_reader(read_from_file))
Usage:
Create a file named example.txt with the following content:
Hi my name is Dmitrii Gangan.
Create an empty file called file_to_be_written_to.txt
Add this as the last line file_writer("example.txt", "file_to_be_written_to.txt") of your .py python file.
python <your_python_script.py> from the terminal.
NOTE: They all must be in the same folder.
Result:
file_to_be_written_to.txt:
Hi my name is Dmitrii Gangan.
This program should do as you requested and allows for modifying the file as it is being read. Each line is read, converted to uppercase, and then written back to the source file. Since it runs on a line-by-line basis, the most extra memory it should need would be related to the length of the longest line.
Example 1
def main():
with get_file('Enter filename: ') as file:
while True:
position = file.tell() # remember beginning of line
line = file.readline() # get the next available line
if not line: # check if at end of the file
break # program is finished at EOF
file.seek(position) # go back to the line's start
file.write(line.upper()) # write the line in uppercase
def get_file(prompt):
while True:
try: # run and catch any error
return open(input(prompt), 'r+t') # r+t = read, write, text
except EOFError: # see if user if finished
raise SystemExit() # exit the program if so
except OSError as error: # check for file problems
print(error) # report operation errors
if __name__ == '__main__':
main()
The following is similar to what you see up above but works in binary mode instead of text mode. Instead of operating on lines, it processes the file in chunks based on the given BUFFER_SIZE and can operate more efficiently. The code under the main loop may replace the code in the loop if you wish for the program to check that it is operating correctly. The assert statements check some assumptions.
Example 2
BUFFER_SIZE = 1 << 20
def main():
with get_file('Enter filename: ') as file:
while True:
position = file.tell()
buffer = file.read(BUFFER_SIZE)
if not buffer:
return
file.seek(position)
file.write(buffer.upper())
# The following code will not run but can replace the code in the loop.
start = file.tell()
buffer = file.read(BUFFER_SIZE)
if not buffer:
return
stop = file.tell()
assert file.seek(start) == start
assert file.write(buffer.upper()) == len(buffer)
assert file.tell() == stop
def get_file(prompt):
while True:
try:
return open(input(prompt), 'r+b')
except EOFError:
raise SystemExit()
except OSError as error:
print(error)
if __name__ == '__main__':
main()
I suggest the following approach:
1) Read/close the file, return the filename and content
2) Create a new file with above filename, and content with UPPERCASE
def open_f():
while True:
fname=raw_input("Enter filename:")
if fname != "done":
try:
with open(fname, "r+") as fhand:
ss = fhand.read()
break
except:
print "WRONG!!!"
continue
else: exit()
return fname, ss
fname, ss =open_f()
with open(fname, "w+") as fhand:
fhand.write(ss.upper())
Like already alluded to in comments, you cannot successively read from and write to the same file -- the first write will truncate the file, so you cannot read anything more from the handle at that point.
Fortunately, the fileinput module offers a convenient inplace mode which works exactly like you want.
import fileinput
for line in fileinput.input(somefilename, inplace=True):
print(line.upper().strip())
Related
I want to convert this corpus hu.txt.xz 15GB which becomes around 60GB after unpacking to small versions of text files, each file with less than 1GB or 100000 lines
The expected output:
| siplit_1.txt
| siplit_2.txt
| siplit_3.txt
.....
| siplit_n.txt
I have this script on a local machine but doesn't work it just loads without process because bigdata as I think :
import fun
import sys
import os
import shutil
# //-----------------------
# Retrieve and return output file max lines from input
def how_many_lines_per_file():
try:
return int(input("Max lines per output file: "))
except ValueError:
print("Error: Please use a valid number.")
sys.exit(1)
# //-----------------------
# Retrieve input filename and return file pointer
def file_dir():
try:
filename = input("Input filename: ")
return open(filename, 'r')
except FileNotFoundError:
print("Error: File not found.")
sys.exit(1)
# //-----------------------
# Create output file
def create_output_file_dir(num, filename):
return open(f"./data/output_{filename}/split_{num}.txt", "a")
# //-----------------------
# Create output directory
def create_output_directory(filename):
output_path = f"./data/output_{filename}"
try:
if os.path.exists(output_path): # Remove directory if exists
shutil.rmtree(output_path)
os.mkdir(output_path)
except OSError:
print("Error: Failed to create output directory.")
sys.exit(1)
def ch_dir():
# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))
# Change the current working directory
os.chdir('./data')
# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))
# //-----------------------
def split_file():
try:
line_count = 0
split_count = 1
max_lines = how_many_lines_per_file()
# ch_dir()
input_file = fun.file_dir()
input_lines = input_file.readlines()
create_output_directory(input_file.name)
output_file = create_output_file_dir(split_count, input_file.name)
for line in input_lines:
output_file.write(line)
line_count += 1
# Create new output file if current output file's line count is greater than max line count
if line_count > max_lines:
split_count += 1
line_count = 0
output_file.close()
# Prevent creation of an empty file after splitting is finished
if not len(input_lines) == max_lines:
output_file = create_output_file_dir(split_count, input_file.name)
# Handle errors
except Exception as e:
print(f"An unknown error occurred: {e}")
# Success message
else:
print(f"Successfully split {input_file.name} into {split_count} output files!")
# //-----------------------
if __name__ == "__main__":
split_file()
Is there any python script or deep learning tool to split them for using the to next task
By calling readlines() on the input file handle, you are reading (or trying to) the whole file into memory at the same time. You can do this instead to process the file one line at a time, never having more than a single line in memory:
input_file = fun.file_dir()
...
for line in input_file:
...
Another issue to be aware of is that this line:
if not len(input_lines) == max_lines:
output_file = create_output_file_dir(split_count, input_file.name)
is likely not doing what you think it is. Neither input_lines or max_lines will ever change inside the loop, so this will either always create a new file or never will. Unless you happen to process a file with exactly max_lines lines in it, this will always be true. This is not a big deal, but I think as your code is now you're going to end up with an extra empty file. You need to change the logic anyway, so you'll have to rethink how to make this work.
UPDATE:
Here's how I would modify the logic to do the right thing regarding opening each of the output files:
input_file = fun.file_dir()
# output_file = create_output_file_dir(split_count, input_file.name)
output_file = None
...
for line in input_file:
# Open a new output file if we don't have one open
if not output_file:
output_file = create_output_file_dir(split_count, input_file.name)
output_file.write(line)
line_count += 1
# Close the current output file if the line count has reached its max
if line_count > max_lines:
split_count += 1
line_count = 0
output_file.close()
output_file = None
The key idea here is that you can't know if you need a new output file until you have tried to read the next line after closing the current output file. This logic only opens an output file when it has a line to write out and there is no open output file.
You're trying to allocate a big file into memory which is not possible.
Instead of reading all the content at once just read line by line and process it.
I've fixed the bug seen by #CryptoFool
import fun
import sys
import os
import shutil
# //-----------------------
# Retrieve and return output file max lines from input
def how_many_lines_per_file():
try:
return int(input("Max lines per output file: "))
except ValueError:
print("Error: Please use a valid number.")
sys.exit(1)
# //-----------------------
# Retrieve input filename and return file pointer
def file_dir():
try:
filename = input("Input filename: ")
return open(filename, 'r')
except FileNotFoundError:
print("Error: File not found.")
sys.exit(1)
# //-----------------------
# Create output file
def create_output_file_dir(num, filename):
return open(f"./data/output_{filename}/split_{num}.txt", "a")
# //-----------------------
# Create output directory
def create_output_directory(filename):
output_path = f"./data/output_{filename}"
try:
if os.path.exists(output_path): # Remove directory if exists
shutil.rmtree(output_path)
os.mkdir(output_path)
except OSError:
print("Error: Failed to create output directory.")
sys.exit(1)
def ch_dir():
# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))
# Change the current working directory
os.chdir('./data')
# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))
# //-----------------------
def split_file():
try:
line_count = 0
split_count = 1
max_lines = how_many_lines_per_file()
# ch_dir()
input_file = fun.file_dir()
create_output_directory(input_file.name)
output_file = None # No output file is created at first, we need to check if there's any line if it enters the for
for line in input_file:
# Open a new output file if we don't have one open
if not output_file:
output_file = create_output_file_dir(split_count, input_file.name)
output_file.write(line)
line_count += 1
# Close the current output file if the line count has reached its max
if line_count > max_lines:
split_count += 1
line_count = 0
output_file.close()
output_file = None
# Handle errors
except Exception as e:
print(f"An unknown error occurred: {e}")
# Success message
else:
print(f"Successfully split {input_file.name} into {split_count} output files!")
# //-----------------------
if __name__ == "__main__":
split_file()
I have the following python code whose purpose is to remove blank lines from an input text file. It should return an output file with all blank lines removed but it doesn't. What's the bug? Thank you!
import sys
def main():
inputFileName = sys.argv[1]
outputFileName = sys.argv[2]
inputFile = open(inputFileName, "r")
outputFile = open(inputFileName, "w")
for line in inputFile:
if "\n" in line:
removeBlank = line.replace("\n", "")
outputFile.write(removeBlank)
else:
outputFile.write(line)
inputFile.close()
outputFile.close()
main()
You have a lot of problem with your code. Specially the condition you check with empty line. People has rightly pointed out some problems.
Here is the solutions that should work and generate the output file with no empty lines.
import sys
def main():
inputFileName = sys.argv[1]
outputFileName = sys.argv[2]
with open(inputFileName) as inputFile, open(inputFileName, "w") as outputFile:
for line in inputFile.readlines():
if line.strip() != '':
outputFile.write(line)
if __name__ == '__main__':
main()
At present your code appears to truncate its input file immediately after opening it. At best this might give differing results on different platforms. On some platforms the file might be empty. I presume that opening the input file for writing was a typo.
A better way to approach this problem is to use a generator. Also, the correct test for an empty line is line == '\n', not '\n' in line, which will be true for all returned lines except perhaps the last.
def noblanks(file):
for line in file:
if line != '\n':
yield line
You can use this like so:
with open(inputFileName, "r") as inf, open(outputFilename, 'w') as outf:
for line in noblanks(inf):
outf.write(line)
The context managers in the with statement will ensure that your files are properly closed without further action on your part.
I am running Python 3.5.1
I have a text file that I'm trying to search through and replace or overwrite text if it matches a predefined variable. Below is a simple example:
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Outdated line of information that has no comment above - message_label
The last line in this example needs to be overwritten so the new file looks like below:
test2.txt after script
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
The function I have written idealAppend does not work as intended and subsequent executions create a bit of a mess. My workaround has been to separate the two lines into single line variables but this doesn't scale well. I want to use this function throughout my script with the ability to handle any number of lines. (if that makes sense)
Script
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
fullData = r'''
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
'''
idealAppend(file, fullData)
def idealAppend(filename, data):
label = data.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != data: # If a line 2 exists that matches the last word (label)
line = data # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(data in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write(data) # Write data to the end of the file
if __name__ == "__main__": main()
Workaround Script
This seems to work perfectly as long as I only need to write exactly two lines. I'd love this to be more dynamic when it comes to number of lines so I can reuse the function easily.
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
comment = r'# This is an important line that needs to be copied'
append = r'Very Important Line of information that the above line is a comment for - message_label'
appendFile(file, comment, append)
def appendFile(filename, comment, append):
label = append.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != append: # If a line 2 exists that matches the last word (label)
line = '\n' + comment + '\n' + append # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(append in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write('\n' + comment + '\n' + append) # Write data to the end of the file
if __name__ == "__main__": main()
I am very new to Python so I'm hoping there is a simple solution that I overlooked. I thought it might make sense to try and split the fullData variable at the new line characters into a list or tuple, filter the label from the last item in the list, then output all entries but this is starting to move beyond what I've learned so far.
If I understand your issue correctly, you can just open the input and output files, then check whether the line contains old information and ends with the label and write the appropriate content accordingly.
with open('in.txt') as f, open('out.txt', 'r') as output:
for line in f:
if line.endswith(label) and not line.startswith(new_info):
output.write(replacement_text)
else:
output.write(line)
If you want to update the original file instead of creating a second one, it's easiest to just delete the original and rename the new one instead of trying to modify it in place.
Is this what you are looking for ? It's looking for a label and then replaces the whole line with whatever you want.
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Here is to be replaced - to_replace
script.py
#!/usr/bin/env python3
def main():
file = 'test2.txt'
label_to_modify = "to_replace"
replace_with = "# Blabla\nMultiline\nHello"
"""
# Raw string stored in a file
file_replace_with = 'replace_with.txt'
with open(file_replace_with, 'r') as f:
replace_with = f.read()
"""
appendFile(file, label_to_modify, replace_with)
def appendFile(filename, label_to_modify, replace_with):
new_file = []
with open(filename, 'r') as f:
for line in f:
if len(line.split()) > 0 and line.split()[-1] == label_to_modify:
new_file.append(replace_with)
else:
new_file.append(line)
with open(filename + ".bak", 'w') as f:
f.write(''.join(new_file))
if __name__ == "__main__": main()
test2.txt.bak
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# Blabla
Multiline
Hello
Reading over both answers I've come up with the following as the best solution i can get to work. It seems to do everything I need. Thanks Everyone.
#!/usr/bin/env python3
def main():
testConfFile = 'test2.txt' # /etc/apache2/apache2.conf
testConfLabel = 'timed_combined'
testConfData = r'''###This is an important line that needs to be copied - ##-#-####
Very Important Line of information that the above line is a \"r\" comment for - message_label'''
testFormatAppend(testConfFile, testConfData, testConfLabel) # Add new test format
def testFormatAppend(filename, data, label):
dataSplit = data.splitlines()
fileDataStr = ''
with open(filename, 'r') as file:
fileData = stringToDictByLine(file)
for key, val in fileData.items():
for row in dataSplit:
if val.strip().endswith(row.strip().split()[-1]):
fileData[key] = ''
fileLen = len(fileData)
if fileData[fileLen] == '':
fileLen += 1
fileData[fileLen] = data
else:
fileLen += 1
fileData[fileLen] = '\n' + data
for key, val in fileData.items():
fileDataStr += val
with open(filename, 'w') as file:
file.writelines(str(fileDataStr))
def stringToDictByLine(data):
fileData = {}
i = 1
for line in data:
fileData[i] = line
i += 1
return fileData
if __name__ == "__main__": main()
I am stuck why the words.txt is not showing the full grid, below is the tasks i must carry out:
write code to prompt the user for a filename, and attempt to open the file whose name is supplied. If the file cannot be opened the user should be asked to supply another filename; this should continue until a file has been successfully opened.
The file will contain on each line a row from the words grid. Write code to read, in turn, each line of the file, remove the newline character and append the resulting string to a list of strings.After the input is complete the grid should be displayed on the screen.
Below is the code i have carried out so far, any help would be appreciated:
file = input("Enter a filename: ")
try:
a = open(file)
with open(file) as a:
x = [line.strip() for line in a]
print (a)
except IOError as e:
print ("File Does Not Exist")
Note: Always avoid using variable names like file, list as they are built in python types
while True:
filename = raw_input(' filename: ')
try:
lines = [line.strip() for line in open(filename)]
print lines
break
except IOError as e:
print 'No file found'
continue
The below implementation should work:
# loop
while(True):
# don't use name 'file', it's a data type
the_file = raw_input("Enter a filename: ")
try:
with open(the_file) as a:
x = [line.strip() for line in a]
# I think you meant to print x, not a
print(x)
break
except IOError as e:
print("File Does Not Exist")
You need a while loop?
while True:
file = input("Enter a filename: ")
try:
a = open(file)
with open(file) as a:
x = [line.strip() for line in a]
print (a)
break
except IOError:
pass
This will keep asking untill a valid file is provided.
try:
content = open("/tmp/out").read()
except:
content = ""
Can I go any shorter or more elegant than this? I've to do it for more than one files so I want something more short.
Is writing function the only shorter way to do it?
What I actually want is this but I want to concat "" if there is any exception
lines = (open("/var/log/log.1").read() + open("/var/log/log").read()).split("\n")
Yes, you'll have to write something like
def get_contents(filename):
try:
with open(filename) as f:
return f.read()
except EnvironmentError:
return ''
lines = (get_contents('/var/log/log.1')
+ get_contents('/var/log/log')).split('\n')
NlightNFotis raises a valid point, if the files are big, you don't want to do this. Maybe you'd write a line generator that accepts a list of filenames:
def get_lines(filenames):
for fname in filenames:
try:
with open(fname) as f:
for line in f:
yield line
except EnvironmentError:
continue
...
for line in get_lines(["/var/log/log.1", "/var/log/log"]):
do_stuff(line)
Another way is to use the standard fileinput.FileInput class (thanks, J.F. Sebastian):
import fileinput
def eat_errors(f, mode):
try:
return open(f, mode)
except IOError:
return open(os.devnull)
for line in fileinput.FileInput(["/var/log/log.1", "/var/log/log"], openhook=eat_errors):
do_stuff(line)
This code will monkey patch out open for another open that creates a FakeFile that always returns a "empty" string if open throws an `IOException``.
Whilst it's more code than you'd really want to write for the problem at hand, it does mean that you have a reusable context manager for faking open if the need arises again (probably twice in the next decade)
with monkey_patched_open():
...
Actual code.
#!/usr/bin/env python
from contextlib import contextmanager
from StringIO import StringIO
################################################################################
class FakeFile(StringIO):
def __init__(self):
StringIO.__init__(self)
self.count = 0
def read(self, n=-1):
return "<empty#1>"
def readlines(self, sizehint = 0):
return ["<empty#2>"]
def next(self):
if self.count == 0:
self.count += 1
return "<empty#3>"
else:
raise StopIteration
################################################################################
#contextmanager
def monkey_patched_open():
global open
old_open = open
def new_fake_open(filename, mode="r"):
try:
fh = old_open(filename, mode)
except IOError:
fh = FakeFile()
return fh
open = new_fake_open
try:
yield
finally:
open = old_open
################################################################################
with monkey_patched_open():
for line in open("NOSUCHFILE"):
print "NOSUCHFILE->", line
print "Other", open("MISSING").read()
print "OK", open(__file__).read()[:30]
Running the above gives:
NOSUCHFILE-> <empty#3>
Other <empty#1>
OK #!/usr/bin/env python
from co
I left in the "empty" strings just to show what was happening.
StringIO would have sufficed just to read it once but I thought the OP was looking to keep reading from file, hence the need for FakeFile - unless someone knows of a better mechanism.
I know some see monkey patching as the act of a scoundrel.
You could try the following, but it's probably not the best:
import os
def chk_file(filename):
if os.stat(filename).st_size == 0:
return ""
else:
with open(filename) as f:
return f.readlines()
if __name__=="__main__":
print chk_file("foobar.txt") #populated file
print chk_file("bar.txt") #empty file
print chk_file("spock.txt") #populated
It works. You can wrap it with your try-except, if you want.
You could define a function to catch errors:
from itertools import chain
def readlines(filename):
try:
with open(filename) as file:
return file.readlines() # or just `file` to return an iterator
except EnvironmentError:
return []
files = (readlines(name) for name in ["/var/log/1", "/var/log/2"])
lines = list(chain.from_iterable(files))