I tried to use this function but I got error.
I think need to change date format of time parameters.
now = datetime.now()
past = now - timedelta(days=2)
past = str(past)
bars = client.get_historical_klines("BTCUSDT", "1m", start_str = past, end_str = None, limit = 1000)
But I got error..
When I delete the start_str and end_str, it works.
How I can handle the date str for this function.
Could you help me?!(example is the best!)
---------ERROR------------
TypeError Traceback (most recent call last)
Input In [46], in <cell line: 1>()
----> 1 bars = client.get_historical_klines(symbol="BTCUSDT", interval="1m",
2 start_str=past, end_str=None, limit=1000)
File ~/opt/anaconda3/lib/python3.9/site-packages/binance/client.py:934, in Client.get_historical_klines(self, symbol, interval, start_str, end_str, limit, klines_type)
914 def get_historical_klines(self, symbol, interval, start_str=None, end_str=None, limit=1000,
915 klines_type: HistoricalKlinesType = HistoricalKlinesType.SPOT):
916 """Get Historical Klines from Binance
917
918 :param symbol: Name of symbol pair e.g BNBBTC
(...)
932
933 """
--> 934 return self._historical_klines(
935 symbol, interval, start_str=start_str, end_str=end_str, limit=limit, klines_type=klines_type
936 )
File ~/opt/anaconda3/lib/python3.9/site-packages/binance/client.py:969, in Client._historical_klines(self, symbol, interval, start_str, end_str, limit, klines_type)
966 timeframe = interval_to_milliseconds(interval)
968 # if a start time was passed convert it
--> 969 start_ts = convert_ts_str(start_str)
971 # establish first available start timestamp
972 if start_ts is not None:
File ~/opt/anaconda3/lib/python3.9/site-packages/binance/helpers.py:76, in convert_ts_str(ts_str)
74 if type(ts_str) == int:
75 return ts_str
---> 76 return date_to_milliseconds(ts_str)
File ~/opt/anaconda3/lib/python3.9/site-packages/binance/helpers.py:24, in date_to_milliseconds(date_str)
22 epoch: datetime = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
23 # parse our date string
---> 24 d: Optional[datetime] = dateparser.parse(date_str, settings={'TIMEZONE': "UTC"})
25 if not d:
26 raise UnknownDateFormat(date_str)
File ~/opt/anaconda3/lib/python3.9/site-packages/dateparser/conf.py:92, in apply_settings.<locals>.wrapper(*args, **kwargs)
89 if not isinstance(kwargs['settings'], Settings):
90 raise TypeError("settings can only be either dict or instance of Settings class")
---> 92 return f(*args, **kwargs)
File ~/opt/anaconda3/lib/python3.9/site-packages/dateparser/__init__.py:61, in parse(date_string, date_formats, languages, locales, region, settings, detect_languages_function)
57 if languages or locales or region or detect_languages_function or not settings._default:
58 parser = DateDataParser(languages=languages, locales=locales,
59 region=region, settings=settings, detect_languages_function=detect_languages_function)
---> 61 data = parser.get_date_data(date_string, date_formats)
63 if data:
64 return data['date_obj']
File ~/opt/anaconda3/lib/python3.9/site-packages/dateparser/date.py:419, in DateDataParser.get_date_data(self, date_string, date_formats)
376 """
377 Parse string representing date and/or time in recognizable localized formats.
378 Supports parsing multiple languages and timezones.
(...)
416
417 """
418 if not isinstance(date_string, str):
--> 419 raise TypeError('Input type must be str')
421 res = parse_with_formats(date_string, date_formats or [], self._settings)
422 if res['date_obj']:
TypeError: Input type must be str
binance.client.Client.get_historical_klines() takes int or str as input value for start_str, see the documentation of this method:
def get_historical_klines(self, symbol, interval, start_str, end_str=None, limit=500,
klines_type: HistoricalKlinesType = HistoricalKlinesType.SPOT):
"""Get Historical Klines from Binance
:param symbol: Name of symbol pair e.g BNBBTC
:type symbol: str
:param interval: Binance Kline interval
:type interval: str
:param start_str: Start date string in UTC format or timestamp in milliseconds
:type start_str: str|int
:param end_str: optional - end date string in UTC format or timestamp in milliseconds (default will fetch everything up to now)
:type end_str: str|int
:param limit: Default 500; max 1000.
:type limit: int
:param klines_type: Historical klines type: SPOT or FUTURES
:type klines_type: HistoricalKlinesType
:return: list of OHLCV values
"""
return self._historical_klines(symbol, interval, start_str, end_str=end_str, limit=limit, klines_type=klines_type)
You're trying to pass an datetime.datetime object.
You can convert this object to a timestamp (ms) with:
import datetime as dt
now = dt.datetime.now(dt.timezone.utc)
past = now - dt.timedelta(days=2)
# Gives you a timestamp in ms
past_timestamp_ms = int(round(past.timestamp() * 1000, 0))
Related
The data in test.csv are like this:
TIMESTAMP POLYLINE
0 1408039037 [[-8.585676,41.148522],[-8.585712,41.148639],[...
1 1408038611 [[-8.610876,41.14557],[-8.610858,41.145579],[-...
2 1408038568 [[-8.585739,41.148558],[-8.58573,41.148828],[-...
3 1408039090 [[-8.613963,41.141169],[-8.614125,41.141124],[...
4 1408039177 [[-8.619903,41.148036],[-8.619894,41.148036]]
.. ... ...
315 1419171485 [[-8.570196,41.159484],[-8.570187,41.158962],[...
316 1419170802 [[-8.613873,41.141232],[-8.613882,41.141241],[...
317 1419172121 [[-8.6481,41.152536],[-8.647461,41.15241],[-8....
318 1419171980 [[-8.571699,41.156073],[-8.570583,41.155929],[...
319 1419171420 [[-8.574561,41.180184],[-8.572248,41.17995],[-...
[320 rows x 2 columns]
I read them from csv file in this way:
train = pd.read_csv("path/train.csv",engine='python',error_bad_lines=False)
So, I have this timestamp in Unix format. I want to convert in UTC time and then extract year, month, day and so on.
This is the code for the conversion from Unix timestamp to UTC date time:
train["TIMESTAMP"] = [float(time) for time in train["TIMESTAMP"]]
train["data_time"] = [datetime.datetime.fromtimestamp(time, datetime.timezone.utc) for time in train["TIMESTAMP"]]
To extract year and other information I do this:
train["year"] = train["data_time"].dt.year
train["month"] = train["data_time"].dt.month
train["day"] = train["data_time"].dt.day
train["hour"] = train["data_time"].dt.hour
train["min"] = train["data_time"].dt.minute
But I obtain this error when the execution arrives at the extraction point:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-30-d2249cabe965> in <module>()
67 train["TIMESTAMP"] = [float(time) for time in train["TIMESTAMP"]]
68 train["data_time"] = [datetime.datetime.fromtimestamp(time, datetime.timezone.utc) for time in train["TIMESTAMP"]]
---> 69 train["year"] = train["data_time"].dt.year
70 train["month"] = train["data_time"].dt.month
71 train["day"] = train["data_time"].dt.day
2 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/accessors.py in __new__(cls, data)
478 return PeriodProperties(data, orig)
479
--> 480 raise AttributeError("Can only use .dt accessor with datetimelike values")
AttributeError: Can only use .dt accessor with datetimelike values
I also read a lot of similiar discussion but I can't figure out why I obtain this error.
Edited:
So the train["TIMESTAMP"] data are like this:
1408039037
1408038611
1408039090
Then I do this with this data:
train["TIMESTAMP"] = [float(time) for time in train["TIMESTAMP"]]
train["data_time"] = [datetime.datetime.fromtimestamp(time, datetime.timezone.utc) for time in train["TIMESTAMP"]]
train["year"] = train["data_time"].dt.year
train["month"] = train["data_time"].dt.month
train["day"] = train["data_time"].dt.day
train["hour"] = train["data_time"].dt.hour
train["min"] = train["data_time"].dt.minute
train = train[["year", "month", "day", "hour","min"]]
I'm just starting with pandas. All the answers I found for the error message do not resolve my error. I'm trying to build a dataframe from a dictionary constructed from an IBM cloudant query. I'm using a jupyter notebook. The specific error message is: If using all scalar values, you must pass an index
the section of code where I think my error is, is here:
def read_high_low_temp(location):
USERNAME = "*************"
PASSWORD = "*************"
client = Cloudant(USERNAME,PASSWORD, url = "https://**********" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time', inplace= True)
for row in temp_dict:
value_list.append(row['temp'])
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
my data (Output from Jupyter) looks like this:
Temperature Time Date
Time 51.6 05:07:18 12-31-2020
Time 51.6 04:59:00 12-31-2020
Time 51.5 04:50:31 12-31-2020
Time 51.5 05:15:38 12-31-2020
Time 51.5 05:03:09 12-31-2020
... ... ... ...
Time 45.3 11:56:34 12-31-2020
Time 45.3 11:52:22 12-31-2020
Time 45.3 11:14:15 12-31-2020
Time 45.2 10:32:05 12-31-2020
Time 45.2 10:36:22 12-31-2020
[164 rows x 3 columns]
my full code looks like:
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, re, sys, sqlite3, logging
#import RPi.GPIO as GPIO
from datetime import datetime
import datetime as dt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "******"
PASSWORD = "******"
client = Cloudant(USERNAME,PASSWORD, url = "********" )
client.connect()
# location='Backyard'
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time')
for row in temp_dict:
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
print ("Cloudant Jupyter Query test\nThe hour = ",dt.datetime.now().hour)
msg1, values=read_high_low_temp("Backyard")
print (msg1)
print(values)
sns.lineplot(values)
The full error message from Jupyter is:
C:\Users\ustl02870\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-34956d8dafb0> in <module>
53
54 #df = sns.load_dataset(values)
---> 55 sns.lineplot(values)
56 #print (values)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
686 data=data, variables=variables,
687 estimator=estimator, ci=ci, n_boot=n_boot, seed=seed,
--> 688 sort=sort, err_style=err_style, err_kws=err_kws, legend=legend,
689 )
690
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in __init__(self, data, variables, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend)
365 )
366
--> 367 super().__init__(data=data, variables=variables)
368
369 self.estimator = estimator
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in __init__(self, data, variables)
602 def __init__(self, data=None, variables={}):
603
--> 604 self.assign_variables(data, variables)
605
606 for var, cls in self._semantic_mappings.items():
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in assign_variables(self, data, variables)
666 self.input_format = "long"
667 plot_data, variables = self._assign_variables_longform(
--> 668 data, **variables,
669 )
670
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in _assign_variables_longform(self, data, **kwargs)
924 # Construct a tidy plot DataFrame. This will convert a number of
925 # types automatically, aligning on index in case of pandas objects
--> 926 plot_data = pd.DataFrame(plot_data)
927
928 # Reduce the variables dictionary to fields with valid data
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
527
528 elif isinstance(data, dict):
--> 529 mgr = init_dict(data, index, columns, dtype=dtype)
530 elif isinstance(data, ma.MaskedArray):
531 import numpy.ma.mrecords as mrecords
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
285 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
286 ]
--> 287 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
288
289
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
78 # figure out the index, if necessary
79 if index is None:
---> 80 index = extract_index(arrays)
81 else:
82 index = ensure_index(index)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in extract_index(data)
389
390 if not indexes and not raw_lengths:
--> 391 raise ValueError("If using all scalar values, you must pass an index")
392
393 if have_series:
ValueError: If using all scalar values, you must pass an index
I resolved my problem with help/direction from #Ena, as it turned out I made several mistake. In layman's terms 1) I was trying to plot a tuple when it should have been a dataframe, 2) My data was in a dictionary, I was iterating through it trying to build a tuple when I should used built in panda tools to build a dataframe right from the dictionary 3) my code should have been written so as to NOT have scalar values so as NOT to need an index, and finally 4) I was trying to use a tuple as data for my seaborn plot when it should have been a dataframe. Here is the code that now works.
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, sys
from datetime import datetime
import datetime as dt
from matplotlib import pyplot as plt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "****************"
PASSWORD = "*****************"
client = Cloudant(USERNAME,PASSWORD, url = "**************************" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'t': 'asc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(temp_dict)
value_list=[]
for row in temp_dict:
value_list.append(row['temp'])
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
msg1, values=read_high_low_temp("Backyard")
g=sns.catplot(x='t', y='temp', data=values, kind='bar',color="darkblue",height=8.27, aspect=11.7/8.27)
print("the minimum temp is:", values['temp'].min(), " the maximum temp is:", values['temp'].max())
plt.xticks(rotation=45)
g.set(xlabel='Time', ylabel='Temperature')
plt.ylim(values['temp'].min()-1, values['temp'].max()+1)
plt.savefig("2021-01-01-temperature graph.png")
g.set_xticklabels(step=10)
The problem is that you assigned "Time" as an index everywhere. Look how the data frame looks in seaborn.lineplot documentation: https://seaborn.pydata.org/generated/seaborn.lineplot.html
Can you try without this df.set_index('Time') part?
I have got this error when try split my one column to few columns. But it split on just on one or two columns.If you wanna split on 3,4,5 columns it writes:
ValueError Traceback (most recent call last)
/usr/local/Cellar/jupyterlab/2.1.5/libexec/lib/python3.8/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
349 try:
--> 350 return self._range.index(new_key)
351 except ValueError:
ValueError: 2 is not in range
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-19-d4e6a4d03e69> in <module>
22 data_old[Col_1_Label] = newz[0]
23 data_old[Col_2_Label] = newz[1]
---> 24 data_old[Col_3_Label] = newz[2]
25 #data_old[Col_4_Label] = newz[3]
26 #data_old[Col_5_Label] = newz[4]
/usr/local/Cellar/jupyterlab/2.1.5/libexec/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
/usr/local/Cellar/jupyterlab/2.1.5/libexec/lib/python3.8/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
350 return self._range.index(new_key)
351 except ValueError:
--> 352 raise KeyError(key)
353 return super().get_loc(key, method=method, tolerance=tolerance)
354
KeyError: 2
There is my code.I have csv file.And when pandas read it - create one column with value 'Контракт'.Then. I split it on another columns. But it split till two columns.I wanna 7 columns!Help please to understand this logic!
import pandas as pd
from pandas import Series, DataFrame
import re
dframe1 = pd.read_csv('po.csv')
columns = ['Контракт']
data_old = pd.read_csv('po.csv', header=None, names=columns)
data_old
# The thing you want to split the column on
SplitOn = ':'
# Name of Column you want to split
Split_Col = 'Контракт'
newz = data_old[Split_Col].str.split(pat=SplitOn, n=-1, expand=True)
# Column Labels (you can add more if you will have more)
Col_1_Label = 'Номер телефону'
Col_2_Label = 'Тарифний пакет'
Col_3_Label = 'Вихідні дзвінки з України за кордон'
Col_4_Label = 'ВАРТІСТЬ ПАКЕТА/ЩОМІСЯЧНА ПЛАТА'
Col_5_Label = 'ЗАМОВЛЕНІ ДОДАТКОВІ ПОСЛУГИ ЗА МЕЖАМИ ПАКЕТА'
Col_6_Label = 'Вартість послуги "Корпоративна мережа'
Col_7_Label = 'ЗАГАЛОМ ЗА КОНТРАКТОМ (БЕЗ ПДВ ТА ПФ)'
data_old[Col_1_Label] = newz[0]
data_old[Col_2_Label] = newz[1]
data_old[Col_3_Label] = newz[2]
#data_old[Col_4_Label] = newz[3]
#data_old[Col_5_Label] = newz[4]
#data_old[Col_6_Label] = newz[5]
#data_old[Col_7_Label] = newz[6]
data_old
Pandas does not support "unstructured text", you should convert it to a standard format or python objects and then create a dataframe from it
Imagine that you have a file with this text named data.txt:
Contract № 12345679 Number of phone: +7984563774
Total price for month : 00.00000
Total price: 10.0000
You can load an process it with Python like this:
with open('data.txt') as f:
content = list(data.readlines())
# First line contains the contract number and phone information
contract, phone = content[0].split(':')
# find contract number using regex
contract = re.findall('\d+', contract)[0]
# The phone is strightforward
phone = phone.strip()
# Second line and third line for prices
total_price = float(content[1].split(':')[1].strip())
total_month_price = float(content[2].split(':')[1].strip())
Then with these variables you can create a dataframe
df = pd.DataFrame([dict(N_of_contract=contract, total_price=total_price, total_month_price =total_month_price )])
Repeat the same for all files.
I'm trying to extract dates from txt files using datefinder.find_dates which returns a generator object. Everything works fine until I try to convert the generator to list, when i get the following error.
I have been looking around for a solution but I can't figure out a solution to this, not sure I really understand the problem neither.
import datefinder
import glob
path = "some_path/*.txt"
files = glob.glob(path)
dates_dict = {}
for name in files:
with open(name, encoding='utf8') as f:
dates_dict[name] = list(datefinder.find_dates(f.read()))
Returns :
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
<ipython-input-53-a4b508b01fe8> in <module>()
1 for name in files:
2 with open(name, encoding='utf8') as f:
----> 3 dates_dict[name] = list(datefinder.find_dates(f.read()))
C:\ProgramData\Anaconda3\lib\site-packages\datefinder\__init__.py in
find_dates(self, text, source, index, strict)
29 ):
30
---> 31 as_dt = self.parse_date_string(date_string, captures)
32 if as_dt is None:
33 ## Dateutil couldn't make heads or tails of it
C:\ProgramData\Anaconda3\lib\site-packages\datefinder\__init__.py in
parse_date_string(self, date_string, captures)
99 # otherwise self._find_and_replace method might corrupt
them
100 try:
--> 101 as_dt = parser.parse(date_string, default=self.base_date)
102 except ValueError:
103 # replace tokens that are problematic for dateutil
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in
parse(timestr, parserinfo, **kwargs)
1354 return parser(parserinfo).parse(timestr, **kwargs)
1355 else:
-> 1356 return DEFAULTPARSER.parse(timestr, **kwargs)
1357
1358
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in
parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
651 raise ValueError("String does not contain a date:",
timestr)
652
--> 653 ret = self._build_naive(res, default)
654
655 if not ignoretz:
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in
_build_naive(self, res, default)
1222 cday = default.day if res.day is None else res.day
1223
-> 1224 if cday > monthrange(cyear, cmonth)[1]:
1225 repl['day'] = monthrange(cyear, cmonth)[1]
1226
C:\ProgramData\Anaconda3\lib\calendar.py in monthrange(year, month)
122 if not 1 <= month <= 12:
123 raise IllegalMonthError(month)
--> 124 day1 = weekday(year, month, 1)
125 ndays = mdays[month] + (month == February and isleap(year))
126 return day1, ndays
C:\ProgramData\Anaconda3\lib\calendar.py in weekday(year, month, day)
114 """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month(1- 12),
115 day (1-31)."""
--> 116 return datetime.date(year, month, day).weekday()
117
118
OverflowError: Python int too large to convert to C long
Can someone explain this clearly?
Thanks in advance
REEDIT : After taking into consideration the remarks that were made, I found a minimal, readable and verifiable example. The error occurs on :
import datefinder
generator = datefinder.find_dates("466990103060049")
for s in generator:
pass
This looks to be a bug in the library you are using. It is trying to parse the string as a year, but that this year is too big to be handled by Python. The library that datefinder is using says that it raises an OverflowError in this instance, but that datefinder is ignoring this possibility.
One quick and dirty hack just to get it working would be to do:
>>> datefinder.ValueError = ValueError, OverflowError
>>> list(datefinder.find_dates("2019/02/01 is a date and 466990103060049 is not"))
[datetime.datetime(2019, 2, 1, 0, 0)]
I am running the below program to extract the stock information:
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
symbols=[]
for ticker in symbols_list:
r = DataReader(ticker, "yahoo",
start=datetime.datetime(2015, 4, 17))
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
This runs perfectly. But when I change the start date to today i.e (2015, 4, 20), then the program errors out. I have tried giving end date as well but no use. Below is the error that I get:
UnboundLocalError Traceback (most recent call last)
<ipython-input-38-a05c721d551a> in <module>()
8 for ticker in symbols_list:
9 r = DataReader(ticker, "yahoo",
---> 10 start=datetime.datetime(2015, 4, 20))
11 # add a symbol column
12 r['Symbol'] = ticker
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in DataReader(name, data_source, start, end, retry_count, pause)
75 return get_data_yahoo(symbols=name, start=start, end=end,
76 adjust_price=False, chunksize=25,
---> 77 retry_count=retry_count, pause=pause)
78 elif data_source == "google":
79 return get_data_google(symbols=name, start=start, end=end,
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in get_data_yahoo(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, interval)
418 raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'")
419 return _get_data_from(symbols, start, end, interval, retry_count, pause,
--> 420 adjust_price, ret_index, chunksize, 'yahoo')
421
422
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price, ret_index, chunksize, source)
359 # If a single symbol, (e.g., 'GOOG')
360 if isinstance(symbols, (compat.string_types, int)):
--> 361 hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
362 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
363 elif isinstance(symbols, DataFrame):
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_hist_yahoo(sym, start, end, interval, retry_count, pause)
206 '&g=%s' % interval +
207 '&ignore=.csv')
--> 208 return _retry_read_url(url, retry_count, pause, 'Yahoo!')
209
210
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _retry_read_url(url, retry_count, pause, name)
175 #Get rid of unicode characters in index name.
176 try:
--> 177 rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
178 except AttributeError:
179 #Python 3 string has no decode method.
UnboundLocalError: local variable 'rs' referenced before assignment
Putting together the suggestions by #JohnE, the code below seems to do the job:
import pandas as pd
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
result = []
for ticker in symbols_list:
url = 'http://chartapi.finance.yahoo.com/instrument/1.0/%s/chartdata;type=quote;range=1d/csv' % ticker.lower()
data = pd.read_csv(url, skiprows=17)
data.columns = ['timestamp', 'close', 'high', 'low', 'open', 'close']
data['ticker'] = ticker
result.append(data)
pd.concat(result)
There result looks like this:
timestamp close high low open close ticker
0 1429536719 125.5500 125.5700 125.4170 125.5100 183600 AAPL
1 1429536772 125.5900 125.6399 125.4600 125.5200 215000 AAPL
2 1429536835 125.7500 125.8000 125.5600 125.5901 348500 AAPL
...
367 1429559941 58.5700 58.5800 58.5400 58.5800 119100 KLAC
368 1429559946 58.5700 58.5700 58.5700 58.5700 0 KLAC
369 1429560000 58.5600 58.5600 58.5600 58.5600 0 KLAC