Getting error parsing date string in Russian language in python 3 - python

I am trying to convert date string to date object in python 3 as follows:
from datetime import datetime
import locale
date_string = "Добавлено: суббота, 26 декабря 2015 г. в 11:01:59"
locale.setlocale(locale.LC_TIME, "rus")
ru_date_object = datetime.strptime(date_string , 'Добавлено: %A, %d %B %Y г. в %H:%M:%S')
print(ru_date_object)
I get following error while executing the above code:
Traceback (most recent call last):
File "F:\Users***\Desktop\DateParser\20 nov.py", line 54, in
ruDate = datetime.strptime(dateStr6, '\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043e: %A, %d %B %Y \u0433. \u0432 %H:%M:%S')
File "F:\Users***\AppData\Local\Programs\Python\Python35-32\lib_strptime.py", line 510, in _strptime_datetime
tt, fraction = _strptime(data_string, format)
File "F:\Users***\AppData\Local\Programs\Python\Python35-32\lib_strptime.py", line 343, in _strptime
(data_string, format))
ValueError: time data '\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043e: \u0441\u0443\u0431\u0431\u043e\u0442\u0430, 26 \u0434\u0435\u043a\u0430\u0431\u0440\u044f 2015 \u0433. \u0432 11:01:59' does not match format '\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043e: %A, %d %B %Y \u0433. \u0432 %H:%M:%S'

The problem is in word 'декабря' which is not nominative. To parse this string into date you should to replace 'декабря' to 'декабрь'. You can do it this way:
from datetime import datetime
import locale
date_string = "Добавлено: суббота, 26 декабря 2015 г. в 11:01:59"
locale.setlocale(locale.LC_TIME, "rus")
d = {'января': 'январь', 'декабря': 'декабрь'}
for k, v in d.items():
date_string = date_string.replace(k, v)
ru_date_object = datetime.strptime(date_string , 'Добавлено: %A, %d %B %Y г. в %H:%M:%S')
print(ru_date_object)

Related

Unable to convert datetime string with timezone to datetime in UTC using datetime python module

I am having issues converting a datetime string of this format "%d %b %Y %X %Z" to "%Y-%m-%dT%X%z". The timezone information is stripped out. For example:
>> import datetime
>> datetime_string_raw = "18 Nov 2022 08:57:04 EST"
>> datetime_utc = datetime.datetime.strptime(datetime_string_raw, "%d %b %Y %X %Z").strftime("%Y-%m-%dT%X%z")
>> print(datetime_utc)
2022-11-18T08:57:04
How can I get it to print the UTC offset? Why doesn't the %Z and %z have any effect? Thanks!
Using dateutil's parser and a definition which abbreviated names should resemble which time zone:
import datetime
import dateutil # pip install python-dateutil
tzinfos = {"EST": dateutil.tz.gettz("America/New_York"),
"EDT": dateutil.tz.gettz("America/New_York")}
datetime_string_raw = "18 Nov 2022 08:57:04 EST"
datetime_ny = dateutil.parser.parse(datetime_string_raw, tzinfos=tzinfos)
print(datetime_ny)
# 2022-11-18 08:57:04-05:00
datetime_utc = datetime_ny.astimezone(datetime.timezone.utc)
print(datetime_utc)
# 2022-11-18 13:57:04+00:00
You can do basically the same using only the standard library, but it requires some pre-processing of the date/time string. Ex:
import datetime
import zoneinfo # Python >= 3.9
def parse_dt_with_tz(dt_string: str, fmt: str, tzinfos: dict) -> datetime.datetime:
"""Parse date/time string with abbreviated time zone name to aware datetime."""
parts = dt_string.split(" ")
tz = tzinfos.get(parts[-1]) # last element is the tz name
if not tz:
raise ValueError(f"no entry found for {parts[-1]} in tzinfos")
return datetime.datetime.strptime(" ".join(parts[:-1]), fmt).replace(tzinfo=tz)
# usage
tzinfos = {"EST": zoneinfo.ZoneInfo("America/New_York"),
"EDT": zoneinfo.ZoneInfo("America/New_York")}
s = "18 Nov 2022 08:57:04 EST"
dt = parse_dt_with_tz(s, "%d %b %Y %H:%M:%S", tzinfos)
print(dt, repr(dt))
# 2022-11-18 08:57:04-05:00 datetime.datetime(2022, 11, 18, 8, 57, 4, tzinfo=zoneinfo.ZoneInfo(key='America/New_York'))

How to convert datetime.datetime to GMT format python?

I am having an input as:
test_date = 2019-06-15 10:16:55-06:00
the code to convert is:
new_value= datetime.strptime(test_date, '%a, %d %b %Y %H:%M:%S GMT')
After getting the value from new_value> I need to convert it to:
new_date = new_value.strftime("%m/%d/%Y %I:%M:%S %p")
But I am getting an error as below:
TypeError: strptime() argument 1 must be str, not datetime.datetime
When I try to convert test_date as string like str(test_date). It causes
ValueError: time data '2019-06-15 10:16:55-06:00' does not match format '%a, %d %b %Y %H:%M:%S GMT'
How can I achieve this?
%a refers to weekday like Sun, Mon, ..., etc, but it does not appear in your test_date input. Therefore it raises an error. -06:00 means Central Standard Time, e.g. in United states, Chicago. Try the following instead.
from datetime import datetime
test_date = '2019-06-15 10:16:55-06:00'
new_value = datetime.strptime(test_date, '%Y-%m-%d %H:%M:%S%z')
new_value = new_value.timestamp()
new_value = datetime.utcfromtimestamp(new_value) #change from CST to GMT
new_date = new_value.strftime("%m/%d/%Y %I:%M:%S %p")
print(new_date)
06/15/2019 04:16:55 PM
You need to specify you date format at strptime to parse date correctly.
Then you need to convert date to GMT timezone like this
from datetime import datetime
test_date = '2019-06-15 10:16:55-06:00'
new_value = datetime.strptime(test_date, '%Y-%m-%d %H:%M:%S%z')
new_value_timestamp = new_value.timestamp()
gmt_date = datetime.utcfromtimestamp(new_value_timestamp)
new_date = gmt_date.strftime("%m/%d/%Y %I:%M:%S %p")
print(new_date)
Output
06/15/2019 04:16:55 PM
from datetime import datetime
date_string = "21-June-2018"
date_string1 = "2019-06-21 10:16:55-0600"
print("date_string =", date_string)
print("type of date_string =", type(date_string))
date_object = datetime.strptime(date_string, "%d-%B-%Y")
date_object1 = datetime.strptime(date_string1, "%Y-%m-%d %H:%M:%S%z")
print("date_object =", date_object)
print("date_object1 =", date_object1)
print("type of date_object =", type(date_object))
new_value= test_date.strftime("%d %b %Y %H:%M:%S GMT")
new_date = datetime.strptime(new_value,'%d %b %Y %H:%M:%S %Z')
new_date will give you the output.

Python datetime converter

I'm trying to write a Python function to standardize incoming timestamps to yyyy-mm-ddThh:mm+/-tz offset.
Example:
def format_ts(ts):
beg_format = [
'%H:%M%a, %b %d, %Y %Z',
'%a, %d %b %Y %H:%M:%S %z',
]
end_format = '%Y-%m-%dT%H:%M %z'
try:
for f in beg_format:
if datetime.strptime(ts, f):
ts_fmt = datetime.strptime(ts, f)
ts_fmt = ts_fmt.strftime(end_format)
return ts_fmt
except:
pass
ts = [
'08:27Sun, Dec 19, 2021 IST',
'Sun, 19 Dec 2021 02:28:56 +0000'
]
for t in ts:
formatted_ts = format_ts(t)
print(formatted_ts)
Issue:
IIRC, I shouldn't rely a failure (pass), but should rather catch the exception and handle
The above function iterates all timestamps through all formats (slow)
Unless I'm missing something, IST is not read by %Z
For some reason, '%a, %d %b %Y %H:%M:%S %z' is not for the correct format for ts[1]
Question: How are others handling this type of issue in Python?
Your timezone doesn't appear to be supported in the string representation for strptime. You can use dateutil parser with a timezone to overcome this.
from dateutil import parser, tz
ts = [
'08:27Sun, Dec 19, 2021 IST',
'Sun, 19 Dec 2021 02:28:56 +0000'
]
def format_ts(ts):
return [parser.parse(t, tzinfos={'IST':tz.gettz('Asia/Calcutta')}) for t in ts]
format_ts(ts)
Output
[datetime.datetime(2021, 12, 19, 8, 27, tzinfo=tzfile('Asia/Calcutta')),
datetime.datetime(2021, 12, 19, 2, 28, 56, tzinfo=tzutc())]

Trouble Converting string to datetime?

I have this strings with the format "09-FEB-21 05.19.32.871000000 AM".
import datetime
dt_string = "09-FEB-21 05.19.32.871000000 AM"
format = "%d-%m-%y %H.%M.%S.%f %p"
dt_object = datetime.datetime.strptime(dt_string, format)
The above code causes ValueError("unconverted data remains: %s" %
time data '09-FEB-21 05.19.32.871000000 AM' does not match format '%d-%m-%y %H.%M.%S.%f %p'
Any help?
Use %b instead of %m (doc):
dt_string = "09-FEB-21 05.19.32.871000000 AM"
print(pd.to_datetime(dt_string, format="%d-%b-%y %H.%M.%S.%f %p"))
Prints:
2021-02-09 05:19:32.871000
EDIT: Using only datetime: Remove last 000 from the fraction:
import re
import datetime
dt_string = "09-FEB-21 05.19.32.871000000 AM"
dt_string = re.sub(r"000(?=\s)", "", dt_string)
format = "%d-%b-%y %H.%M.%S.%f %p"
dt_object = datetime.datetime.strptime(dt_string, format)
print(dt_object)
Prints:
2021-02-09 05:19:32.871000

Date String to Time tuple

I am trying to pass a string into as function and need to convert it into a time tuple:
def sim(startdate, enddate):
# need to convert the date from string to integer time tuple:
dt_start = dt.date(startdate)
print 'Start Date: ', dt_start
dt_end = dt.date(enddate)
print 'End Date: ', dt_end
# in String format
sim('Jan 1, 2011', 'Dec 31, 2011')
# in interger in string format
sim('2011,1,1', '2011,12,31')
Another way would be to use strptime(). Have a time format defined for both of your formats and use them accordingly. This is what I mean:
import datetime as dt
def sim(startdate, enddate):
time_format_one = "%b %d, %Y"
time_format_two = "%Y,%m,%d"
try:
dt_start = dt.datetime.strptime(startdate, time_format_one)
dt_end = dt.datetime.strptime(enddate, time_format_one)
except ValueError:
dt_start = dt.datetime.strptime(startdate, time_format_two)
dt_end = dt.datetime.strptime(enddate, time_format_two)
print 'Start Date: ', dt_start.date()
print 'End Date: ', dt_end.date()
# in String format
sim('Jan 1, 2011', 'Dec 31, 2011')
# in interger in string format
sim('2011,1,1', '2011,12,31')
prints:
Start Date: 2011-01-01
End Date: 2011-12-31
Start Date: 2011-01-01
End Date: 2011-12-31
You could use timetuple() on dt_start and dt_end if you need time tuple.
I assume you want to convert date ('Jan 1, 2011', 'Dec 31, 2011') and ('2011,1,1', '2011,12,31') into timetuple
from datetime import datetime
date_str = "Jan 1, 2011"
fmt = "%b %d, %Y"
# Construct a datetime object
date_obj = datetime.strptime(date_str, fmt)
# Convert it to any string format you want
new_fmt = "%Y, %m, %d"
print date_obj.strftime(new_fmt)
# Prints'2011, 01, 01'
# If you want python timetuple then
t_tuple = date_obj.timetuple()
What you're probably trying to do is the following:
import datetime as dt
year,month,day = map(int, '2011,1,1'.split(','))
dt_start = dt.date(year,month,day)
print dt_start # prints 2011-01-01
The error is because of the use of a string '2011,1,1' instead of integers: 2011,1,1 as an input to: datetime.date()

Categories

Resources