The format of my csv file is this
Zeitstempel;Iteration;lag
"2022-01-26T22:28:11.347Z","1","2"
"2022-01-26T22:28:11.348Z","2","1"
and my python code is this
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
csv_data = 'lag.log'
df = pd.read_csv(csv_data, encoding='latin1', sep=',', header=1,
names=['Zeitstempel', 'Iteration', 'Lag'])
df_cleaned = df.dropna()
x = df_cleaned['Zeitstempel'].values
y = df_cleaned['Lag'].values
#2022-01-26T21:59:30.810Z
dates = [dt.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.{0}Z").date() for date in x]
plt.plot_date(dates, y)
plt.show()
and the console output is this
Traceback (most recent call last):
File "chart.py", line 19, in <module>
dates = [dt.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.{0}Z").date() for date in x]
File "chart.py", line 19, in <listcomp>
dates = [dt.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.{0}Z").date() for date in x]
File "/usr/lib/python3.8/_strptime.py", line 568, in _strptime_datetime
tt, fraction, gmtoff_fraction = _strptime(data_string, format)
File "/usr/lib/python3.8/_strptime.py", line 349, in _strptime
raise ValueError("time data %r does not match format %r" %
ValueError: time data '2022-01-26T22:28:11.348Z' does not match format '%Y-%m-%dT%H:%M:%S.{0}Z'
Is there a way to convert the iso string to a datetime
Python's datetime.fromisoformat does not accept the 'Z' suffix, so you'll need to remove it.
from datetime import datetime
def datetime_from_js_isoformat(string: str) -> datetime:
"""Creates a datetime object from a JavaScript ISO format string."""
if string.endswith('Z'):
return datetime.fromisoformat(string[:-1])
return datetime.fromisoformat(string)
Related
I have a date string that I am trying to format using python datetime.
print(datestr, type(datestr)
2006-06-09T00:00:00 <class 'str'>
from datetime import datetime
import numpy as np
lastDate = datetime.strptime(datestr, '%Y-%m-%d')
# Calculate Months since
mosLast = (pd.to_datetime('today') - lastDate)/np.timedelta64(1, 'M')
Traceback:
lastDate = datetime.strptime(datestr, '%Y-%m-%d')
File "/Applications/Anaconda/anaconda3/lib/python3.9/_strptime.py", line 568, in _strptime_datetime
tt, fraction, gmtoff_fraction = _strptime(data_string, format)
File "/Applications/Anaconda/anaconda3/lib/python3.9/_strptime.py", line 352, in _strptime
raise ValueError("unconverted data remains: %s" %
ValueError: unconverted data remains: T00:00:00
I resolved this with:
# Format date
lastDate = datetime.strptime(datestr, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d')
lastDate = datetime.strptime(datestr, '%Y-%m-%d')
# Calculate Months since last sale date
mosLastSale = (pd.to_datetime('today') - lastDate)/np.timedelta64(1, 'M')
I need to import a csv file using pandas that have a date field in the format 'year.decimal day' such as '1980.042' which would be in the format 'DD/MM/YYYY', '11/02/1980'.
File sample:
data
1980.042
1980.125
1980.208
1980.292
1980.375
1980.458
1980.542
1980.625
1980.708
Using pd.to_datetime I can transform it like this:
d = '1980.042'
print(pd.to_datetime(d, format = '%Y.%j'))
Output:
1980-02-11 00:00:00
My first attempt was to read the file and convert the dataframe column:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = p
d.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 4, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
The second attempt was to transform the column into a str and then a date:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = df['data'].astype(str)
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 6, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
Then I realized that for some internal floating point issue the data was getting more than three decimal places. So I rounded it up to just three decimal places before convert:
import pandas as pd
df = pd.read_csv('datas.csv')
print(df.dtypes, '\n\n', df.head())
df['data'] = df['data'].round(3).astype(str)
print(df.dtypes, '\n\n', df.head())
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
Output:
data float64
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
data object
dtype: object
data
0 1980.042
1 1980.125
2 1980.208
3 1980.292
4 1980.375
Traceback (most recent call last):
File "datas.py", line 8, in <module>
df['data'] = pd.to_datetime(df['data'], '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 451, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 368, in _convert_listlike
require_iso8601=require_iso8601
File "pandas/_libs/tslib.pyx", line 492, in pandas._libs.tslib.array_to_datetime
File "pandas/_libs/tslib.pyx", line 513, in pandas._libs.tslib.array_to_datetime
AssertionError
Finally, I looking at the pandas documentation and in some forums that I could define the data type when reading the file and also apply a lambda function:
import pandas as pd
date_parser = lambda col: pd.to_datetime(str(col), format = '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df.dtypes, '\n\n', df.head())
Output:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 377, in _convert_listlike
values, tz = conversion.datetime_to_datetime64(arg)
File "pandas/_libs/tslibs/conversion.pyx", line 188, in pandas._libs.tslibs.conversion.datetime_to_datetime64
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "datas.py", line 5, in <module>
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 446, in _read
data = parser.read(nrows)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1036, in read
ret = self._engine.read(nrows)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1921, in read
names, data = self._do_date_conversions(names, data)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 1675, in _do_date_conversions
self.index_names, names, keep_date_col=self.keep_date_col)
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 3066, in _process_date_conversion
data_dict[colspec] = converter(data_dict[colspec])
File "/usr/lib/python3/dist-packages/pandas/io/parsers.py", line 3033, in converter
return generic_parser(date_parser, *date_cols)
File "/usr/lib/python3/dist-packages/pandas/io/date_converters.py", line 39, in generic_parser
results[i] = parse_func(*args)
File "datas.py", line 3, in <lambda>
date_parser = lambda col: pd.to_datetime(str(col), format = '%Y.%j')
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 469, in to_datetime
result = _convert_listlike(np.array([arg]), box, format)[0]
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 380, in _convert_listlike
raise e
File "/usr/lib/python3/dist-packages/pandas/core/tools/datetimes.py", line 347, in _convert_listlike
errors=errors)
File "pandas/_libs/tslibs/strptime.pyx", line 163, in pandas._libs.tslibs.strptime.array_strptime
ValueError: unconverted data remains: 5
Anyway, nothing works, has anyone been there? Any suggestions for doing the file reading with the correct data type or for converting the column on the dataframe?
I really hadn't realized the problem with the data.
Removing those with decimal parts greater than 365, I tested Tuhin Sharma's idea.
Unfortunately, it returns the value of the first line for all dataframe lines.
But I used the datetime module, as suggested by Tuhin Sharma, in a lambda function when reading the file as follows:
Sample file:
data
1980.042
1980.125
1980.208
1980.292
Code:
import pandas as pd
import datetime
date_parser = lambda col: datetime.datetime.strptime(col, '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df)
Output:
data
0 1980-02-11
1 1980-05-04
2 1980-07-26
3 1980-10-18
You could try using datetime module. You can try the following code:-
import pandas as pd
import numpy as np
import datetime
import pandas as pd
df = pd.read_csv('datas.csv',dtype=str)
df["data"] = df["data"].map(lambda x: datetime.datetime.strptime(x,'%Y.%j'))
However this code will fail. Because your data has problem.
1980.375
1980.458
1980.542
1980.625
1980.708
For these values if you see the number of days is greater than 365 (3 decimal places), which is not possible and thats why it will throw error.
Hope this helps!!
You can try the following code as well which is a lot cleaner:-
import pandas as pd
import datetime
date_parser = lambda x: datetime.datetime.strptime(x, '%Y.%j')
df = pd.read_csv('datas.csv', parse_dates = ['data'], date_parser = date_parser)
print(df)
how must I change an element from a list that is in this form: 05/06/2020, to get a date object like:
date_object = datetime.strptime(listelement, '%m/%d/%y') ?
Here is my Code:
daten = {}
with open("Observed_Monthly_Rain_Gauge_Accumulations_-_Oct_2002_to_May_2017.csv", 'r') as csvfile:
regen_csv = csv.reader(csvfile)
next(regen_csv, None)
for rows in regen_csv:
keys = rows[:1]
strt= str(keys[0]) in
date_object = datetime.strptime(strt, '%m/%d/%y')
values = rows[1:]
daten[strt] = values
print(daten)
Traceback (most recent call last):
File *directory*, line 16, in <module>
date_object = datetime.strptime(strt, '%m/%d/%y')
File "C:\Program Files\Python37\lib\_strptime.py", line 577, in _strptime_datetime
tt, fraction, gmtoff_fraction = _strptime(data_string, format)
File "C:\Program Files\Python37\lib\_strptime.py", line 362, in _strptime
data_string[found.end():])
ValueError: unconverted data remains: 02
Heres a link to the list with the data:
https://data.seattle.gov/api/views/rdtp-hzy3/rows.csv?accessType=DOWNLOAD
You need to use %Y for years in the form 2020. A full list of format codes can be found here.
date_object = datetime.strptime(strt, '%m/%d/%Y')
I'm trying to dynamically convert a string seconds like below to a string date.
'1545239561 +0100'
The problem is the timezone inserted at the end, and I can't find any python time object method using the good format to retrieve the date from this string.
My tries :
>>>seconds = '1545239561 +0100'
>>>time.strftime('%y%m%d-%H%M%S-%f', datetime.datetime.fromtimestamp(seconds)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: an integer is required (got type str)
>>>time.strptime(seconds)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/python/3.6.4_3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py", line 559, in _strptime_time
tt = _strptime(data_string, format)[0]
File "/usr/local/Cellar/python/3.6.4_3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py", line 362, in _strptime
(data_string, format))
ValueError: time data '1545239561 +0100' does not match format '%a %b %d %H:%M:%S %Y'
>>>time.strptime(seconds, "%S +%Z")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/python/3.6.4_3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py", line 559, in _strptime_time
tt = _strptime(data_string, format)[0]
File "/usr/local/Cellar/python/3.6.4_3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/_strptime.py", line 362, in _strptime
(data_string, format))
ValueError: time data '1545239561 +0100' does not match format '%S +%Z'
I would try to process both those value separately and merge them into single datetime:
>>>from datetime import datetime
>>>s = '1545239561 +0100'
>>>seconds, offset = s.split()
>>>datetime.fromtimestamp(int(seconds)).replace(tzinfo=datetime.strptime(offset, "%z").tzinfo)
datetime.datetime(2018, 12, 19, 17, 12, 41, tzinfo=datetime.timezone(datetime.timedelta(0, 3600)))
Yes #mfrackwiak...
I did this
>>> epoch = "1545239561 +0100"
>>> seconds, offset = epoch.split()
>>> datetime.fromtimestamp(int(seconds)).replace(tzinfo=datetime.strptime(offset, "%z").tzinfo).strftime('%Y-%m-%d %H:%M:%S-%Z')
'2018-12-19 18:12:41-UTC+01:00'
>>>
You can try the below example :
from datetime import datetime,timedelta
# split the timevalue and offset value
ss = '1545239561 +0100'.split()
format = "%A, %B %d, %Y %I:%M:%S"
# calculate the hour and min in the offset
hour = int(ss[1][0:2])
min = int(ss[1][2:])
# calculate the time from the sec and convert it to datetime object
time_from_sec = datetime.strptime(datetime.fromtimestamp(int(ss[0])).strftime(
format), format)
# add the offset delta value to the time calculated
time_with_delta_added = time_from_sec + timedelta(hours=hour,minutes=min)
print(time_with_delta_added)
Output :
2018-12-19 12:22:41
I keep getting an error using the numpy loadtxt converter.
Your help is greatly appreciated
import numpy as np
import time
import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from matplotlib.finance import candlestick
from matplotlib.dates import strpdate2num
import urllib2
## global variables
eachStock = 'AAPL','GOOG','MSFT','AMZN','CMG'
for stock in eachStock:
stockFile = stock+'.txt'
date, closep, highp, lowp, openp, volume = np.loadtxt(eachStock, delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})
dFrame = Series(closep)
here is the first line in my text file
20040322,13.5200,13.6800,12.6100,12.6850,15850720
here is the error I keep getting
Traceback (most recent call last):
File "C:\Users\antoniozeus\Desktop\BuyAndHold.py", line 27, in <module>
converters={ 0: mdates.strpdate2num('%Y%m%d')})
File "C:\Python27\lib\site-packages\numpy\lib\npyio.py", line 796, in loadtxt
items = [conv(val) for (conv, val) in zip(converters, vals)]
File "C:\Python27\lib\site-packages\matplotlib\dates.py", line 233, in __call__
return date2num(datetime.datetime(*time.strptime(s, self.fmt)[:6]))
File "C:\Python27\lib\_strptime.py", line 454, in _strptime_time
return _strptime(data_string, format)[0]
File "C:\Python27\lib\_strptime.py", line 325, in _strptime
(data_string, format))
ValueError: time data 'AAPL' does not match format '%Y%m%d'
It seems like you mistyped stockFile (filename) as eachStock.
date, closep, highp, lowp, openp, volume = np.loadtxt(
stockFile, delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})