Parse updated text from a .txt file

Parse updated text from a .txt file - python

I'm attempting to read and parse a .txt file that is continually being updated throughout the day. I want to parse only lines that have not already been consumed. These are then to be sent to a Telegram group.
At present, every time I run the script it parses everything.
selections = []
msgList = []
urr = ""
name = ""
ourLines=len(selections)
while(True):
file1 = open(r'C:\\urlt\log.txt', 'r')
Lines = file1.readlines()
file1.close()
try:
while(True):
if(ourLines==len(Lines)):
break
else:
txt = Lines[ourLines].strip()
tlist = txt.split("&")
ourLines=ourLines+1
for subtxt in tlist:
if "eventurl=" in subtxt:
a = subtxt[9:len(subtxt) - 3]
url = "www.beefandtuna.com/%23"+a.replace("%23", "/").strip('(')
#print(url)
urr = url
elif "bet=" in subtxt:
name = urllib.parse.unquote(subtxt[4:len(subtxt)])
#print(name)
selections.append(url+name)
msg = url +" " '\n' "Name: "+ name
if msg not in msgList:
post_to_telegram(msg)
msgList.append(msg)
#time.sleep(0.5)
except:
pass

Assuming the new contents are appended to the end of the file: after you finish reading the file, create a copy of the file.
The next time you read the file, seek to the location that is the length of the copy.
import os
from shutil import copyfile
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
backup_file_loc = in_file_loc + ".bak"
while True:
try:
file_backup_size = os.stat(backup_file_loc).st_size
except:
file_backup_size = 0
file1 = open(in_file_loc, 'r')
# move file position to the end of the old file
file1.seek(file_backup_size)
# Read all lines in the file after the position we seek-ed to
Lines = file1.readlines()
file1.close()
# copy current version of file to backup
copyfile(in_file_loc, backup_file_loc)
# Then do whatever you want to do with Lines
This is probably not the best way to do this because, as rici said in a comment below:
"make a copy" is not an atomic operation, and as the file grows copying will be successively slower. Any data appended to the log file during the copy will never be reported. Furthermore, the copy might happen to include a partial entry, in which case the next scan will start in the middle of an entry.
An alternative is to save the size of the current file in a different one:
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
size_file_loc = in_file_loc + ".lastsize"
while True:
# read old size from file
try:
with open(size_file_loc, 'r') as f:
file_size = int(f.read())
except:
# if error, file size is zero
file_size = 0
file1 = open(in_file_loc, 'r')
file1.seek(file_size)
Lines = file1.readlines()
new_file_size = file1.tell() # Get the location of the current file marker
file1.close()
# write new size to file
with open(size_file_loc, 'w') as f:
f.write(str(new_file_size))
# Then do whatever you want to do with Lines

Related

Python - define a function to manage files

I need to define a fucntion that will, in short:
Open and grab the content from an existing file
Transform that content
Create a new file
Write that new content in this new file
Print the content of the new file
I'm a complete begginer, but I got this until now. How can I improve this?
def text():
#open the existing file
text_file = open('music.txt', 'r')
#reads the file
reading = text_file.read ()
#this turns everything to lower case, counts the words and displays the list vertically
from collections import Counter
new_text = reading.lower()
list_words = Counter(new_text.split())
ordered_list = sorted(list_words.items())
#creates a new file and writes the content there
with open('finheiro_saida.txt', 'x') as final_file:
for i in ordem:
finheiro_saida.write(str(i) + '\n')
#not sure how to open this new file and print its content, when I tried it says the new file doesn't exist in the directory - tried everything.
final = open('C:/Users/maria/OneDrive/Documents/SD_DTM/ficheiro_saida.txt', 'r')
read_file = final.read ()
print(read_file)

You can open the new file and print its content the same way you read and wrote to it!
# ...After all your previous code...
with open('finheiro_saida.txt', 'r') as final_file:
final_file_content = final_file.read()
print(final_file_content)

Fixed some syntax error in your code.
you can display the the same way you read.
Also provide all imports to the start of the file.
you can also read all lines from the file as a list using file.readlines()
from collections import Counter
def text():
# open the existing file
text_file = open("music.txt", "r")
# reads the file
reading = text_file.read()
# this turns everything to lower case, counts the words and displays the list vertically
new_text = reading.lower()
list_words = Counter(new_text.split())
ordered_list = sorted(list_words.items())
# creates a new file and writes the content there
file_name = "finheiro_saida.txt"
with open("finheiro_saida.txt", "x") as final_file:
for i in ordered_list:
final_file.write(str(i) + "\n")
return file_name
def display(final_file_name):
with open(final_file_name) as file:
print(file.read())
final_file_name = text()
display(final_file_name)

Append multiple lines in multiple files

I need to create a certain number of files that always have the same lines inside them.
With this script, I can create the "schede" folder in which a certain number of *.tex files are created.
Latex strings are written only in the last tab, while the others remain blank. How can I have all the forms filled in?
import os
import subprocess
work_path = os.path.abspath(os.path.dirname(__file__))
if not os.path.exists("schede"):
os.mkdir("schede")
os.chdir(os.path.expanduser(work_path+"/schede"))
n = 5 #put the number as you wish
for i in range(n):
file_name = "S"+str(i).zfill(1)+".tex"
subprocess.call(['touch', file_name]) #crea 34 file s.tex
def append_new_line(file_name, text_to_append):
"""Append given text as a new line at the end of file"""
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
# Move read cursor to the start of file.
file_object.seek(0)
# If file is not empty then append '\n'
data = file_object.read(100)
if len(data) > 0:
file_object.write("\n")
# Append text at the end of file
file_object.write(text_to_append)
def append_multiple_lines(file_name, lines_to_append):
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
appendEOL = False
# Move read cursor to the start of file.
file_object.seek(0)
# Check if file is not empty
data = file_object.read(100)
if len(data) > 0:
appendEOL = True
# Iterate over each string in the list
for line in lines_to_append:
# If file is not empty then append '\n' before first line for
# other lines always append '\n' before appending line
if appendEOL == True:
file_object.write("\n")
else:
appendEOL = True
# Append element at the end of file
file_object.write(line)
def main():
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvtitle{}{}',
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{}}{}',
'\ecvitem{}{}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\smallskip',
'\ecvitem{}{',
'\\begin{ecvitemize}'
'\item scrivere un\'item',
'\\end{ecvitemize}',
'}']
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)
if __name__ == '__main__':
main()

As #MisterMiyagi said file_name is one variable so every time you make a file it is overwritten
I suggest making a list called files = []
and instead of just file_name = "S"+str(i).zfill(1)+".tex"
add files.append(file_name)
to the end of the for loop where you create the files.
and change the main to do the instructions for each file.
def main():
for file_name in file:
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvtitle{}{}',
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{}}{}',
'\ecvitem{}{}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\smallskip',
'\ecvitem{}{',
'\\begin{ecvitemize}'
'\item scrivere un\'item',
'\\end{ecvitemize}',
'}']
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)

Retrieving information from text file

I would like to take in the variable "name" the input from the user, then I would like load the account.zk (that is a normal text file) in to a list, and then I would like check if the input is already on this list. If is not, I would like to add it, and on the contrary, I would like to skip and go ahead.
I've coded this function myself, but I didn't succeed! Can someone understand why?
# Start Downloading System
name = input(Fore.WHITE+"Enter Profile Name To Download: ")
# Save Account (used for check the updates!)
file_object = open("account.zk", "r")
for line in file_object:
stripped_line = line.strip()
line_list = stripped_line.split()
list_of_lists.append(line_list)
if (len(list_of_lists) != len(set(name))):
print("\n\nAlready in List!\n\n")
file_object.close
time.sleep(5)
else:
file_object.close
file_object = open("account.zk", "w")
file_object.write(name + "\n")
file_object.close

I think what you want to do is the following:
# Start Downloading System
name = input(Fore.WHITE + "Enter Profile Name To Download: ")
# Save Account (used for check the updates!)
file_object = open("account.zk", "r")
list_of_lists = []
for line in file_object:
stripped_line = line.strip()
line_list = stripped_line.split()
list_of_lists.extend(line_list) # add the elements of the line to the list
if name in list_of_lists: # check if the name is already in the file
print("\n\nAlready in List!\n\n")
file_object.close()
time.sleep(5)
else: # if not, add it
file_object.close()
file_object = open("account.zk", "w")
file_object.write(name + "\n")
file_object.close()

Capture the last timestamp, without reading the complete file using Python

I am fairly new to python and I trying to capture the last line on a syslog file using python but unable to do so. This is a huge log file so I want to avoid loading the complete file in memory. I just want to read the last line of the file and capture the timestamp for further analysis.
I have the below code which captures all the timestamps into a python dict which take a really long time to run for it to get to the last timestamp once it completed my plan was to reverse the list and capture the first object in the index[0]:
The lastFile function uses glob module and gives me the most latest log file name which is being fed into recentEdit of the main function.
Is there a better way of doing this
Script1:
#!/usr/bin/python
import glob
import os
import re
def main():
syslogDir = (r'Location/*')
listOfFiles = glob.glob(syslogDir)
recentEdit = lastFile(syslogDir)
print(recentEdit)
astack=[]
with open(recentEdit, "r") as f:
for line in f:
result = [re.findall(r'\d{4}.\d{2}.\d{2}T\d{2}.\d{2}.\d{2}.\d+.\d{2}.\d{2}',line)]
print(result)
def lastFile(i):
listOfFiles = glob.glob(i)
latestFile = max(listOfFiles, key=os.path.getctime)
return(latestFile)
if __name__ == '__main__': main()
Script2:
###############################################################################
###############################################################################
#The readline() gives me the first line of the log file which is also not what I am looking for:
#!/usr/bin/python
import glob
import os
import re
def main():
syslogDir = (r'Location/*')
listOfFiles = glob.glob(syslogDir)
recentEdit = lastFile(syslogDir)
print(recentEdit)
with open(recentEdit, "r") as f:
fLastLine = f.readline()
print(fLastLine)
# astack=[]
# with open(recentEdit, "r") as f:
# for line in f:
# result = [re.findall(r'\d{4}.\d{2}.\d{2}T\d{2}.\d{2}.\d{2}.\d+.\d{2}.\d{2}',line)]
# print(result)
def lastFile(i):
listOfFiles = glob.glob(i)
latestFile = max(listOfFiles, key=os.path.getctime)
return(latestFile)
if __name__ == '__main__': main()
I really appreciate your help!!
Sincerely.

If you want to directly go,to the end of the file. Follow these steps:
1.Every time your program runs persist or store the last '\n' index.
2.If you have persisted index of last '\n' then you can directly seek to that index using
file.seek(yourpersistedindex)
3.after this when you call file.readline() you will get the lines starting from yourpersistedindex.
4.Store this index everytime your are running your script.
For Example:
you file log.txt has content like:
timestamp1 \n
timestamp2 \n
timestamp3 \n
import pickle
lastNewLineIndex = None
#here trying to read the lastNewLineIndex
try:
rfile = open('pickledfile', 'rb')
lastNewLineIndex = pickle.load(rfile)
rfile.close()
except:
pass
logfile = open('log.txt','r')
newLastNewLineIndex = None
if lastNewLineIndex:
#seek(index) will take filepointer to the index
logfile.seek(lastNewLineIndex)
#will read the line starting from the index we provided in seek function
lastLine = logfile.readline()
print(lastLine)
#tell() gives you the current index
newLastNewLineIndex = logfile.tell()
logfile.close()
else:
counter = 0
text = logfile.read()
for c in text:
if c == '\n':
newLastNewLineIndex = counter
counter+=1
#here saving the new LastNewLineIndex
wfile = open('pickledfile', 'wb')
pickle.dump(newLastNewLineIndex,wfile)
wfile.close()

How do I search a file for a string and replace it with multiple lines in Python?

I am running Python 3.5.1
I have a text file that I'm trying to search through and replace or overwrite text if it matches a predefined variable. Below is a simple example:
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Outdated line of information that has no comment above - message_label
The last line in this example needs to be overwritten so the new file looks like below:
test2.txt after script
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
The function I have written idealAppend does not work as intended and subsequent executions create a bit of a mess. My workaround has been to separate the two lines into single line variables but this doesn't scale well. I want to use this function throughout my script with the ability to handle any number of lines. (if that makes sense)
Script
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
fullData = r'''
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
'''
idealAppend(file, fullData)
def idealAppend(filename, data):
label = data.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != data: # If a line 2 exists that matches the last word (label)
line = data # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(data in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write(data) # Write data to the end of the file
if __name__ == "__main__": main()
Workaround Script
This seems to work perfectly as long as I only need to write exactly two lines. I'd love this to be more dynamic when it comes to number of lines so I can reuse the function easily.
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
comment = r'# This is an important line that needs to be copied'
append = r'Very Important Line of information that the above line is a comment for - message_label'
appendFile(file, comment, append)
def appendFile(filename, comment, append):
label = append.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != append: # If a line 2 exists that matches the last word (label)
line = '\n' + comment + '\n' + append # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(append in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write('\n' + comment + '\n' + append) # Write data to the end of the file
if __name__ == "__main__": main()
I am very new to Python so I'm hoping there is a simple solution that I overlooked. I thought it might make sense to try and split the fullData variable at the new line characters into a list or tuple, filter the label from the last item in the list, then output all entries but this is starting to move beyond what I've learned so far.

If I understand your issue correctly, you can just open the input and output files, then check whether the line contains old information and ends with the label and write the appropriate content accordingly.
with open('in.txt') as f, open('out.txt', 'r') as output:
for line in f:
if line.endswith(label) and not line.startswith(new_info):
output.write(replacement_text)
else:
output.write(line)
If you want to update the original file instead of creating a second one, it's easiest to just delete the original and rename the new one instead of trying to modify it in place.

Is this what you are looking for ? It's looking for a label and then replaces the whole line with whatever you want.
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Here is to be replaced - to_replace
script.py
#!/usr/bin/env python3
def main():
file = 'test2.txt'
label_to_modify = "to_replace"
replace_with = "# Blabla\nMultiline\nHello"
"""
# Raw string stored in a file
file_replace_with = 'replace_with.txt'
with open(file_replace_with, 'r') as f:
replace_with = f.read()
"""
appendFile(file, label_to_modify, replace_with)
def appendFile(filename, label_to_modify, replace_with):
new_file = []
with open(filename, 'r') as f:
for line in f:
if len(line.split()) > 0 and line.split()[-1] == label_to_modify:
new_file.append(replace_with)
else:
new_file.append(line)
with open(filename + ".bak", 'w') as f:
f.write(''.join(new_file))
if __name__ == "__main__": main()
test2.txt.bak
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# Blabla
Multiline
Hello

Reading over both answers I've come up with the following as the best solution i can get to work. It seems to do everything I need. Thanks Everyone.
#!/usr/bin/env python3
def main():
testConfFile = 'test2.txt' # /etc/apache2/apache2.conf
testConfLabel = 'timed_combined'
testConfData = r'''###This is an important line that needs to be copied - ##-#-####
Very Important Line of information that the above line is a \"r\" comment for - message_label'''
testFormatAppend(testConfFile, testConfData, testConfLabel) # Add new test format
def testFormatAppend(filename, data, label):
dataSplit = data.splitlines()
fileDataStr = ''
with open(filename, 'r') as file:
fileData = stringToDictByLine(file)
for key, val in fileData.items():
for row in dataSplit:
if val.strip().endswith(row.strip().split()[-1]):
fileData[key] = ''
fileLen = len(fileData)
if fileData[fileLen] == '':
fileLen += 1
fileData[fileLen] = data
else:
fileLen += 1
fileData[fileLen] = '\n' + data
for key, val in fileData.items():
fileDataStr += val
with open(filename, 'w') as file:
file.writelines(str(fileDataStr))
def stringToDictByLine(data):
fileData = {}
i = 1
for line in data:
fileData[i] = line
i += 1
return fileData
if __name__ == "__main__": main()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parse updated text from a .txt file - python

Related

Python - define a function to manage files

Append multiple lines in multiple files

Retrieving information from text file

Capture the last timestamp, without reading the complete file using Python

How do I search a file for a string and replace it with multiple lines in Python?

Categories

Resources