Extract date from file name in python, not fixed name - python

I need to get date from file name in python code. I found many solutions, but from fixed name and date. But I dont know what the name of the file will be, date is changing. How to do that?
I have a code which is working for known file name (current date), file is called micro20230125.txt
import re
import os
from datetime import datetime
header = """#SANR0000013003;*;#CNR0010;*;#RINVAL-777.0;*;"""
current_timestamp = datetime.today().strftime('%Y%m%d')
input_file = "micro" + current_timestamp + ".txt"
output_file = os.path.splitext(input_file)[0] + ".zrxp"
with open(input_file, "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
`
but I dont need current date. I will get file with some random date, so how can I extract unknown date from file name? How to change this variable current_timestamp?
I tried to use regex but I messed something up
EDIT: DIFF CODE, SIMILAR PROBLEM:
I was dealing with this code and then realized: python doesnt know what those numbers in name represent, so why treat them like a date and complicate things? Those are just numbers. As a matter of fact, I need those numbers as long as full file name. So I came up with different code.
import re
import os
def get_numbers_from_filename(filename):
return re.search(r'\d+', filename).group(0) #returns only numbers
for filename in os.listdir("my path"):
print (get_numbers_from_filename(filename))
def get_numbers_from_filename(filename):
return re.search(r"(.)+", filename).group(0) #returns all name
for filename in os.listdir("my path"):
print(get_numbers_from_filename(filename))
file was: micro20230104.txt
and result is:
result
Now, I want to use that result, dont want to print it.
No matter how I get that returns me error.
import re
import os
def get_numbers_from_filename(filename):
return re.search(r"(.)+", filename).group(0)
for filename in os.listdir("my path"):
print(get_numbers_from_filename(filename))
m = get_numbers_from_filename(filename)
output_file = os.path.splitext(m)[0] + ".zrxp"
with open(m, "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
but it it says error
error:there is no such file
what to do? what am I doing wrong?

Well, in case all the files have the format 'micro[YearMonthDay].txt', you can try this solution:
import os
from datetime import datetime
header = """#SANR0000013003;*;#CNR0010;*;#RINVAL-777.0;*;"""
#Change the variable folder_path for your actual directory path.
folder_path = "\\path_files\\"
filenames = []
# Iterate directory
for path in os.listdir(folder_path):
# check if current path is a file
if os.path.isfile(os.path.join(folder_path, path)):
filenames.append(path)
dates = []
for filename in filenames:
# First solution:
filename = filename.replace('micro', '')
filename = filename.replace('.txt', '')
date = datetime.strptime(filename, "%Y%m%d")
# Second solution:
# date = datetime.strptime(filename, "micro%Y%m%d.txt")
dates.append(date)
for date in dates:
print(date.strftime("%Y/%m/%d"))
with open(f'.\\micro{date.strftime("%Y/%m/%d")}.txt', "r") as f:
first_line = f.readline().strip('\n')
text = re.search('(\d{6})', first_line).group(1)
text = header + "\n" + text + "\n"
with open(output_file, "w") as f:
f.write(text)
print(text)
Use the solution you prefer and comment the other one.
Testing:
Text files for test
Code
Result
I hope I could help! :D

Related

Incrementing a file name in python

I am making code which generates a new text file with today's date each time it is run. For exemple today's file name would be 2020-10-05. I would like to increment it so that if the program is run one or more times the same day it becomes 2020-10-05_1, _2 etc..
I have this code that I found from another question and i've tried tinkering with it but I'm still stuck. The problem is here they convert the file name to an int 1,2,3 and this way it works but this isn't the result I want.
def incrementfile():
todayday = datetime.datetime.today().date()
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
highest_num = 0
for f in os.listdir(output_folder):
if os.path.isfile(os.path.join(output_folder, f)):
file_name = os.path.splitext(f)[0]
try:
file_num = int(file_name)
if file_num > highest_num:
highest_num = file_num
except ValueError:
print("The file name %s is not an integer. Skipping" % file_name)
output_file = os.path.join(output_folder, str(highest_num + 1) + f"{todayday}" + ".txt")
return output_file
How can I modify this code so that the output I get in the end is something like 2020-10-05_0, _1, _2 etc.. ?
Thanks !
I strongly recommend you to use pathlib instead of os.path.join. This is more convenient.
def incrementfile():
td = datetime.datetime.today().date()
path = pathlib.Path("/tmp") #set your output folder isntead of /tmp
inc = len(list(path.glob(f"{td}*")))+1
outfile = path/f"{td}_{inc}.txt"
return outfile
Not a direct answer to your question, but instead of using _1, _2 etc, you could use a full timestamp with date and current time, which would avoid duplication, EG:
from datetime import datetime
t = str(datetime.now()).replace(":", "-").replace(" ", "_")
print(t)
Example output:
2020-10-05_13-06-53.825870
I think this will work-
import os
import datetime
#assuming files will be .txt format
def incrementfile():
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
files=os.listdir(output_folder)
current_name=datetime.date.today().strftime('%Y-%m-%d_0')
current_num=1
def nameChecker(name,files):
return True if name +'.txt' in files else False
while namChecker(current_name,files):
current_name+='_'+str(current_num)
current_num+=1
return current_name+'.txt'

Python Validate Filename is of specified format or not with date part

I am trying to validate the filenames which are having datepart in their name. What i am trying to do is i want to check a filename with specified format where datpart is different for different file. If the filename doesn't match it should give us the message file not found.
Filename e.g =abc_sales_2020-09-01_exp.csv, abc_sales_2020-09-02_exp.csv,abc_sales_2020-09-03_exp.csv. Only the datepart changes rest remains the same.
from datetime import date
def get_filename_datetime():
return "ak_sales_" + str(date.today()) + "_abc"+".csv"
name = get_filename_datetime() print("NAME", name) path = "aks/" +
name print("PATH", path);
with open(path, "r") as f:
f.read()
You can use regex to filter specific files and then open them with pandas:
import re
import glob
import pandas as pd
for file in glob.glob('*.csv'):
r = re.findall('\d\d\d\d-\d\d-\d\d', file)
if r:
df = pd.read_csv(file)

Trying to create a text file with the date

I am trying to merge files and after create a new file and name it with the day date.
import datetime
import os
filename = datetime.datetime.now()
file1 = open("A1.txt", 'r+')
file2 = open("A2.txt", 'r+')
file3 = open("A3.txt", 'r+')
d1 = file1.read()
d2 = file2.read()
d3 = file3.read()
datac = [d1, d2, d3]
def file_w():
with open((filename.strftime("%D") + ".txt" ,'w+')) as file:
file.write()
for i in datac:
file.write(i)
file_w()
It looks like your error is the open statement (the parenthesis at near the end):
with open((filename.strftime("%D") + ".txt" ,'w+')) as file:
should be:
with open((filename.strftime("%D") + ".txt") ,'w+') as file:
but you might have a problem naming a file with "/" in it, such as generated by the "%D" in the strftime method. Instead you could try:
with open((filename.strftime("%m_%d_%Y") + ".txt") ,'w+') as file:

Python, open a file when the name is not fully known

I have a list of files with names such as these:
20140911_085234.csv
20140912_040056.csv
What is known is the first part which is the date (the second is a random number). How can I open the correct file if I know the date?
Update: There is one file per day.
As #isedev says, you could use the fnmatch method to find all the files with the "date" pattern. The code could be like this:
from fnmatch import fnmatch
import os
folder_path = '/home/Desktop/project'
all_files = os.listdir(folder_path)
content_file = 'Hello World'
_date = '20140911'
_pattern = _date + '*'
for file_name in all_files:
if fnmatch(file_name, _pattern):
with open(os.path.join(folder_path, file_name), 'wb') as f:
f.write(content_file)
I hope it helps you!
Using glob :
import time
import glob
import os
def open_file_by_date(date):
path = "/path/to/file"
files = glob.glob1(path, date + "_*.csv")
for file in files:
with open(os.path.join(path, file), 'wb') as f:
#do your stuff with file
if __name__ == "__main__":
today = time.strftime("%Y%m%d")
open_file_by_date(today)

Python - extract and modify part of a specific line of text with a function for all files in folder

I'm looking to extract and modify a specific line of text in many files within a folder but I am having some trouble.
For instance, the first file might read:
To: Bob
From: Bill
<Message> The eagle flies at midnight. <End Message>
The second message is different, but same format, and so on. I'd like to extract the third line, pass 'The eagle flies at midnight.' through a function (like base64), and then put it back on the line between 'Message' and 'End Message'. Such that the final output would read:
To: Bob
From: Bill
<Message> VGhlIGVhZ2xlIGZsaWVzIGF0IG1pZG5pZ2h0Lg== <End Message>
This is what I am trying (and adjusting) so far.
import base64
import os
import io
#ask user where his stuff is / is going
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
#get that stuff
myfilepath = os.path.join(directory, '*.txt')
with open('*.txt', 'r') as file:
data = file.readlines()
#Go to line 3 and take out non encoded text.
data[3] = X
X.strip("<Message>")
X.strip("<End Message>")
coded_string = X
#Encode line 3
base64.b64encode(coded_string)
data[3] = '<Message> %s <End Message>' % (coded_string)
# and write everything back
with open('*.txt', 'w') as file:
file.writelines(data)
I'm sure there are numerous problems, particularly with how I am opening and writing back. Bonus points: 99% of the messages in this folder are in this exact format, but there are 1% junk messages (they dont need to be encoded, and line 3 for them is something different). I'm not too worried about them, but if they could be unharmed in the process that'd be nifty. Maybe line 3 should be line 2 if the count starts at 0 ...
Edit: Trying
import re, base64
import os
folder = 'C:/Users/xxx/Desktop/input'
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for filename in os.listdir(folder):
infilename = os.path.join(folder, filename)
if not os.path.isfile(infilename): continue
base, extension = os.path.splitext(filename)
filein = open(infilename, 'r')
fileout = open(os.path.join(folder, '{}_edit.{}'.format(base, extension)), 'w')
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
Ultimately this gives me a bunch of blank files except for the last one which is translated properly.
You can use regular expression to make it easier as:
import re, base64
filein = open("examplein.txt", 'r')
fileout = open("exampleout.txt", 'w')
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
This code works just for one file, you should adapt it to work with all the file in you directory:
import re, base64
import os
folder = '/home/user/Public'
matcher = re.compile("<Message>(?P<text>[^<]*)<End Message>")
for filename in os.listdir(folder):
infilename = os.path.join(folder, filename)
if not os.path.isfile(infilename): continue
base, extension = os.path.splitext(filename)
filein = open(infilename, 'r')
fileout = open(os.path.join(folder, '{}_edit.{}'.format(base, extension)), 'w')
for line in filein:
match = matcher.search(line)
if match:
fileout.write("<message> " + base64.b64encode(match.group('text').strip()) + " <End message>\n")
else:
fileout.write(line)
filein.close()
fileout.close()
This code works in my pc

Categories

Resources