I am unexperienced with Python and am trying to parse all timestamps of the following csv as datetime objects in order to then perform functions on them (e.g. find timestamp differences etc.).
However, I can parse single lines but not the whole timestamp column. I am getting a 'KeyError: '2010-12-30 14:32:00' for the first date of the timestamp column, when reaching the line below my 'not working' comment.
Thanks in advance.
from datetime import datetime, timedelta
import pandas as pd
from dateutil.parser import parse
csvFile = pd.read_csv('runningComplete.csv')
column = csvFile['timestamp']
column = column.str.slice(0, 19, 1)
print(column)
dt1 = datetime.strptime(column[1], '%Y-%m-%d %H:%M:%S')
print(dt1)
dt2 = datetime.strptime(column[2], '%Y-%m-%d %H:%M:%S')
print(dt1)
dt3 = dt1 - dt2
print(dt3)
for row in column:
print(row)
Not working:
for row in column:
timestamp = datetime.strptime(column[row], '%Y-%m-%d %H:%M:%S')
Related
I have a dataframe with timestamp of different formats one with 05-28-2022 14:05:30 and one with 06-04-2022 03:04:13.002 both I want to convert into iso format how can I do that?
input output
05-28-2022 14:05:30 -> 2022-05-28T14:05:30.000+0000
06-04-2022 03:04:13.002 -> 2022-06-04T03:04:13.002+0000
You can use strptime() + strftime(). Here is an example:
from datetime import datetime
import pytz
# parse str to instance
first = datetime.strptime('05-28-2022 14:05:30', '%m-%d-%Y %H:%M:%S')
first = first.replace(tzinfo=pytz.UTC)
print(first.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
print(f'{first.isoformat()}')
second = datetime.strptime('06-04-2022 03:04:13.002', '%m-%d-%Y %H:%M:%S.%f')
second = second.replace(tzinfo=pytz.UTC)
print(second.strftime('%Y-%m-%dT%H:%M:%S.%f%z'))
print(second.isoformat())
# 2022-05-28T14:05:30.000000+0000
# 2022-05-28T14:05:30+00:00
# 2022-06-04T03:04:13.002000+0000
# 2022-06-04T03:04:13.002000+00:00
See datetime docs. Also you can use other packages for dates processing / formatting:
iso8601
pendulum
dateutil
arrow
Example with dataframe:
import pandas as pd
import pytz
from datetime import datetime
df = pd.DataFrame({'date': ['05-28-2022 14:05:30', '06-04-2022 03:04:13.002']})
def convert_date(x):
dt_format = '%m-%d-%Y %H:%M:%S.%f' if x.rfind('.', 1) > -1 else '%m-%d-%Y %H:%M:%S'
dt = datetime.strptime(x, dt_format).replace(tzinfo=pytz.UTC)
return dt.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
df['new_date'] = df['date'].apply(convert_date)
print(df)
date new_date
0 05-28-2022 14:05:30 2022-05-28T14:05:30.000000+0000
1 06-04-2022 03:04:13.002 2022-06-04T03:04:13.002000+0000
I need a way to reformat the date and time from 2021-01-27T12:00:17Z as a separate date and time variable in the format as shown below:
Date: 27/01/2021
Time: 12:00
import pandas as pd
values = {'dates': ['2021-01-27T12:00:17Z']}
df = pd.DataFrame(values)
df['dates'] = pd.to_datetime(df['dates'], format='%Y-%m-%dT%H:%M:%SZ')
formatted_date = pd.to_datetime(df['dates']).dt.date
print('Formatted Date:',formatted_date)
formatted_time = pd.to_datetime(df['dates']).dt.time
print('Formatted Time:',formatted_time)
print ('df value:', df)
print (df.dtypes)
When I change the syntax from format='%Y-%m-%dT%H:%M:%SZ' to format='%d-%m-%YT%H:%M:%SZ' it produces an error.
Any help would be much appreciated.
I am using these, hope it helps;
from datetime import datetime, timedelta, timezone
utc_time = datetime.fromtimestamp(date_time).astimezone(timezone.utc)
local_time = datetime.fromtimestamp(date_time).astimezone(local_tz)
date = datetime.fromisoformat(date_time).astimezone(local_tz).date
time = datetime.fromisoformat(date_time).astimezone(local_tz).time
for datetime calculation, we can use timedelta
local_time = datetime.fromtimestamp(date_time).astimezone(local_tz) + deltatime(hours=5)
local_time = datetime.fromtimestamp(date_time).astimezone(local_tz) + deltatime(minutes=60)
<built-in method date of datetime.datetime object at 0x000002BF40795DA0>
<built-in method time of datetime.datetime object at 0x000002BF40795DA0>
the date and time are datetime.datetime objects.
i have an dataframe with dates and would like to get the time between the first date and the last date, when i run the code below
df.sort_values('timestamp', inplace=True)
firstDay = df.iloc[0]['timestamp']
lastDay = df.iloc[len(df)-1]['timestamp']
print(firstDay)
print(lastDay)
it provides the following formate of the dates :
2016-09-24 17:42:27.839496
2017-01-18 10:24:08.629327
and I'm trying to get the different between them but they're in the str format, and I've been having trouble converting them to a form where i can get the difference
here you go :o)
import datetime
from datetime import date
from datetime import datetime
import pandas as pd
date_format_str = '%Y-%m-%d %H:%M:%S.%f'
date_1 = '2016-09-24 17:42:27.839496'
date_2 = '2017-01-18 10:24:08.629327'
start = datetime.strptime(date_1, date_format_str)
end = datetime.strptime(date_2, date_format_str)
diff = end - start
# Get interval between two timstamps as timedelta object
diff_in_hours = diff.total_seconds() / 3600
print(diff_in_hours)
# get the difference between two dates as timedelta object
diff = end.date() - start.date()
print(diff.days)
Pandas
import datetime
from datetime import date
from datetime import datetime
import pandas as pd
date_1 = '2016-09-24 17:42:27.839496'
date_2 = '2017-01-18 10:24:08.629327'
start = pd.to_datetime(date_1, format='%Y-%m-%d %H:%M:%S.%f')
end = pd.to_datetime(date_2, format='%Y-%m-%d %H:%M:%S.%f')
# get the difference between two datetimes as timedelta object
diff = end - start
print(diff.days)
Can anyone please tell me how to save my parsed datetime objects to a list? Please see code after the last comment where the problem comes up - Why do I get the AttributeError: 'datetime.datetime' object has no attribute 'toList'? Thanks!
from datetime import datetime, timedelta
import pandas as pd
from dateutil.parser import parse
csvFile = pd.read_csv('myFile.csv')
column = csvFile['timestamp']
column = column.str.slice(0, 19, 1)
dt1 = datetime.strptime(column[1], '%Y-%m-%d %H:%M:%S')
print("dt1", dt1) #output: dt1 2010-12-30 15:06:00
dt2 = datetime.strptime(column[2], '%Y-%m-%d %H:%M:%S')
print("dt2", dt2) #output: dt2 2010-12-30 16:34:00
dt3 = dt1 - dt2
print("dt3", dt3) #output: dt3 -1 day, 22:32:00
#works:
for row in range(len(column)):
timestamp = datetime.strptime(column[row], '%Y-%m-%d %H:%M:%S')
print("timestamp", timestamp) #output (excerpt): timestamp 2010-12-30 14:32:00 timestamp 2010-12-30 15:06:00
#trying to save all parsed timestamps in list, NOT WORKING
myNewList = timestamp.toList()
print(myNewList)
you should create the list before the for loop, and then add each element to it in the loop, like so:
myNewList = []
#works:
for row in range(len(column)):
timestamp = datetime.strptime(column[row], '%Y-%m-%d %H:%M:%S')
print("timestamp", timestamp)
myNewList.append(timestamp)
print(myNewList)
I have a file with a million tweets. The first tweet occurred 2013-04-15 20:17:18 UTC. I want to update each tweet row afterward with the minutes since minsSince that first tweet.
I have found help with datetime here, and converting time here, but when I put the two together I don't get the right times. It could be something with the UTC string at the end of each published_at value.
The error it throws is:
tweets['minsSince'] = tweets.apply(timesince,axis=1)
...
TypeError: ('string indices must be integers, not str', u'occurred at index 0')
Thanks for any help.
#Import stuff
from datetime import datetime
import time
import pandas as pd
from pandas import DataFrame
#Read the csv file
tweets = pd.read_csv('BostonTWEETS.csv')
tweets.head()
#The first tweet's published_at time
starttime = datetime (2013, 04, 15, 20, 17, 18)
#Run through the document and calculate the minutes since the first tweet
def timesince(row):
minsSince = int()
tweetTime = row['published_at']
ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweetTime['published_at'], '%Y-%m-%d %H:%M:%S %UTC'))
timediff = (tweetTime - starttime)
minsSince.append("timediff")
return ",".join(minsSince)
tweets['minsSince'] = tweets.apply(timesince,axis=1)
df = DataFrame(tweets)
print(df)
Sample csv file of first 5 rows.
#Import stuff
from datetime import datetime
import time
import pandas as pd
from pandas import DataFrame
#Read the csv file
tweets = pd.read_csv('sample.csv')
tweets.head()
#The first tweet's published_at time
starttime = tweets.published_at.values[0]
starttime = datetime.strptime(starttime, '%Y-%m-%d %H:%M:%S UTC')
#Run through the document and calculate the minutes since the first tweet
def timesince(row):
ts = datetime.strptime(row, '%Y-%m-%d %H:%M:%S UTC')
timediff = (ts- starttime)
timediff = divmod(timediff.days * 86400 + timediff.seconds, 60)
return timediff[0]
tweets['minSince'] = 0
tweets.minSince = tweets.published_at.map(timesince)
df = DataFrame(tweets)
print(df)
I hope this is what you are looking for.