import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np
df=pd.read_excel('currencynames.xlsx', sheet_name='Sheet1')
writer = pd.ExcelWriter('cryptodata.xlsx', engine='xlsxwriter')
currencies = df['Currency Name']
print(currencies[0])
for i in range(0,1465,1):
print("https://coinmarketcap.com/currencies/"+currencies[i]+"/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))
currency_market_info = pd.read_html("https://coinmarketcap.com/currencies/"+currencies[i].lower()+"/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))[0]
currency_market_info = currency_market_info.assign(Date=pd.to_datetime(currency_market_info['Date']))
currency_market_info.loc[currency_market_info['Volume']=="-",'Volume']=0
currency_market_info['Volume'] = currency_market_info['Volume'].astype('int64')
currency_market_info.head()
currency_market_info.to_excel(writer, sheet_name=currencies[i])
currency_market_info = 0
print("Done with "+currencies[i])
C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py:816:
FutureWarning: elementwise comparison failed; returning scalar
instead, but in the future will perform elementwise comparison
result = getattr(x, name)(y) Traceback (most recent call last):
File "", line 1, in
runfile('C:/Users/SAU/Desktop/deep learning/cryptodata/data.py', wdir='C:/Users/SAU/Desktop/deep learning/cryptodata')
File
"C:\Users\SAU\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 880, in runfile
execfile(filename, namespace)
File
"C:\Users\SAU\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/SAU/Desktop/deep learning/cryptodata/data.py", line
19, in
currency_market_info.loc[currency_market_info['Volume']=="-",'Volume']=0
File "C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py",
line 879, in wrapper
res = na_op(values, other)
File "C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py",
line 818, in na_op
raise TypeError("invalid type comparison")
TypeError: invalid type comparison
Related
I am trying to create a some rolling timeseries panels (rolling 12 months forward each time) in pandas using the below code:
import pandas as pd
import numpy as np
def main():
dates = pd.date_range('1995-12-31', periods=480, freq='M', name='Date')
stoks = pd.Index(['s{:04d}'.format(i) for i in range(4000)])
df = pd.DataFrame(np.random.rand(480, 4000), dates, stoks)
print(df.head())
rdf = roll(df, 12)
def roll(df, w):
# stack df.values w-times shifted once at each stack
roll_array = np.dstack([df.values[i:i+w, :] for i in range(len(df.index) - w + 1)]).T
# roll_array is now a 3-D array and can be read into
# a pandas panel object
panel = pd.Panel(roll_array,
items=df.index[w-1:],
major_axis=df.columns,
minor_axis=pd.Index(range(w), name='roll'))
# convert to dataframe and pivot + groupby
# is now ready for any action normally performed
# on a groupby object
return panel.to_frame().unstack().T.groupby(level=0)
if __name__ == "__main__":
print ("Processing daily beta data...\n")
try:
main()
except KeyboardInterrupt:
print ("Ctrl+C pressed. Stopping...")
I am however getting the following error:
minor_axis=pd.Index(range(w), name='roll'))
TypeError: object() takes no parameters
Where have I gone wrong?
See below for the complete stack trace:
runfile('C:/Users/stacey/Documents/Strategies/Crypto/Data/Daily/beta_create_stack.py', wdir='C:/Users/stacey/Documents/Strategies/Crypto/Data/Daily')
Processing daily beta data...
s0000 s0001 s0002 ... s3997 s3998 s3999
Date ...
1995-12-31 0.572736 0.411435 0.914554 ... 0.389626 0.846049 0.736057
1996-01-31 0.012432 0.995075 0.254918 ... 0.497208 0.689789 0.140903
1996-02-29 0.901161 0.618174 0.153318 ... 0.126790 0.780571 0.494440
1996-03-31 0.932200 0.812955 0.814507 ... 0.511931 0.693407 0.571828
1996-04-30 0.348410 0.653946 0.591804 ... 0.196170 0.257667 0.880753
[5 rows x 4000 columns]
Traceback (most recent call last):
File "<ipython-input-181-981f6609137d>", line 1, in <module>
runfile('C:/Users/stacey/Data/Daily/beta_create_stack.py', wdir='C:/Users/stacey/Documents/Strategies/Crypto/Data/Daily')
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 47, in <module>
main()
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 17, in main
rdf = roll(df, 12)
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 29, in roll
minor_axis=pd.Index(range(w), name='roll'))
TypeError: object() takes no parameters
Traceback (most recent call last):
File "<ipython-input-181-981f6609137d>", line 1, in <module>
runfile('C:/Users/stacey/Data/Daily/beta_create_stack.py', wdir='C:/Users/stacey/Data/Daily')
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 47, in <module>
main()
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 17, in main
rdf = roll(df, 12)
File "C:/Users/stacey/Data/Daily/beta_create_stack.py", line 29, in roll
minor_axis=pd.Index(range(w), name='roll'))
TypeError: object() takes no parameters
I have two dataframes from .csv files, and I am combining them based on a common col name they share, "NAME" and what I am trying to do is display the differences of two of the factors on another column. However the error I get is
Traceback (most recent call last):
File "C:\Users\nhoss\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexes\base.py", line 2891, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1675, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1683, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: '\ufeff2010'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\nhoss\OneDrive\Desktop\Senior_Project\responserate.py", line 22, in <module>
combinedresponse['DIFFERENCE'] = combinedresponse['\ufeff2010'] - combinedresponse['2000']
File "C:\Users\nhoss\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\frame.py", line 2902, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\nhoss\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\indexes\base.py", line 2893, in get_loc
raise KeyError(key) from err
KeyError: '\ufeff2010'
[Finished in 0.933s]
Here is my code:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import string
response2000 = pd.read_csv(r'C:\Users\nhoss\OneDrive\Desktop\Senior_Project\2000ResponseRates.csv', skiprows=0)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
response2010 = pd.read_csv(r'C:\Users\nhoss\OneDrive\Desktop\Senior_Project\responserate2010.csv', skiprows=0 )
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
combinedresponse = response2000.merge(response2010, on='NAME', how='inner')
combinedresponse['DIFFERENCE'] = combinedresponse['2010'] - combinedresponse['2000']
print(combinedresponse)
The CSV Files:
responserate2010.csv
2010,NAME,STATE,COUNTY_ID
52,Allegany County,36,3
64,Bronx County,36,5
68,Broome County,36,7
57,Cattaraugus County,36,9
64,Cayuga County,36,11
61,Chautauqua County,36,13
71,Chemung County,36,15
58,Chenango County,36,17
62,Clinton County,36,19
50,Columbia County,36,21
67,Cortland County,36,23
50,Delaware County,36,25
66,Dutchess County,36,27
70,Erie County,36,29
63,Fulton County,36,35
52,Essex County,36,31
59,Franklin County,36,33
2000ResponseRates.csv:
SS,CCC,NAME,2000
36,001,Albany County,70
36,003005,Allegany County,60
36,005,Bronx County,56
36,007,Broome County,72
36,009,Cattaraugus County,64
36,011,Cayuga County,60
36,013,Chautauqua County,66
36,015,Chemung County,75
36,017,Chenango County,65
36,019,Clinton County,68
36,021,Columbia County,62
36,023,Cortland County,64
36,025,Delaware County,53
36,027,Dutchess County,68
36,029,Erie County,74
36,031,Essex County,58
36,033,Franklin County,67
Please try as below:
combinedresponse = pd.merge(respose2000, response2010, on="NAME", how="inner)
combinedresponse['DIFFERENCE'] = combinedresponse['2010'] - combinedresponse['2000']
We have a model that uses Dataportal of Pyomo to read parameter from several csv files. On a Windows laptop we are running into the following error while this is not replicable on another computer. Any ideas what might be missing in this setting?
Traceback (most recent call last):
File "", line 1, in
runfile('C:/Users/stianbac/OneDrive - NTNU/EMPIRE/EMPIRE in Pyomo/EMPIRE_Pyomo_version_4/Empire_draft4.py',
wdir='C:/Users/stianbac/OneDrive - NTNU/EMPIRE/EMPIRE in
Pyomo/EMPIRE_Pyomo_version_4')
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 710, in runfile
execfile(filename, namespace)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/stianbac/OneDrive - NTNU/EMPIRE/EMPIRE in
Pyomo/EMPIRE_Pyomo_version_4/Empire_draft4.py", line 107, in
instance = model.create_instance(data)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyomo\core\base\DataPortal.py",
line 138, in load
self.connect(**kwds)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyomo\core\base\DataPortal.py",
line 98, in connect
self._data_manager.open()
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyomo\core\plugins\data\sheet.py",
line 54, in open
self.sheet = ExcelSpreadsheet(self.filename, ctype=self.ctype)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyutilib\excel\spreadsheet.py",
line 79, in new
return ExcelSpreadsheet_win32com(*args, **kwds)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyutilib\excel\spreadsheet_win32com.py",
line 59, in init
self.open(filename, worksheets, default_worksheet)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\pyutilib\excel\spreadsheet_win32com.py",
line 80, in open
self._ws[wsid] = self.wb.Worksheets.Item(wsid)
File
"C:\Users\stianbac\AppData\Local\Continuum\anaconda3\lib\site-packages\win32com\client\dynamic.py",
line 516, in getattr
ret = self.oleobj.Invoke(retEntry.dispid,0,invoke_type,1)
com_error: (-2147418111, 'Call was rejected by callee.', None, None)
Here is the entry of the code:
from __future__ import division
from pyomo.environ import *
#from pyomo.core.expr import current as EXPR
#import numpy as np
import math
import csv
model = AbstractModel()
model.Nodes = Set()
model.Generators = Set() #g
...
data = DataPortal()
data.load(filename='Sets.xlsx',range='B1:B53',using='xlsx',format="set", set=model.Generators)
data.load(filename='Sets.xlsx',range='nodes',using='xlsx',format="set", set=model.Nodes)
...
instance = model.create_instance(data)
...
Pandas' corrwith work fine with SPY but not OK with ^GSPC.
Can anybody help me ? Thanks in advance.
import pandas.io.data as web
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import csv
Symbol = ['^GSPC','SPY','XLK','XLV','XLY','XLP','XLE','XLF','XLI','XLB','XLU']
STOCK = web.DataReader(name=Symbol,data_source='yahoo',start='2000-1-1',end='2009-3-1')
STOCK['PercentA'] = np.round(np.log(STOCK['Adj Close']/STOCK['Adj Close'].shift(1)),3)
### OK wiht SPY, but not OK wiht ^GSPC
STOCKCORR = np.round(STOCK['PercentA'].corrwith(STOCK['PercentA'].^GSPC),3)
StockNan = STOCKCORR[~STOCKCORR.isnull()]
StockOrder = StockNan.order().iloc[::-1]
StockRows = StockOrder.iloc[:20]
print StockRows
the error messages:
runfile('D:/###pg/yahoo_final_Chang/150830__Revised/test.py', wdir='D:/###pg/yahoo_final_Chang/150830__Revised')
Traceback (most recent call last):
File "", line 1, in
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "D:/###pg/yahoo_final_Chang/150830__Revised/test.py", line 16
STOCKCORR = np.round(STOCK['PercentA'].corrwith(STOCK['PercentA'].^GSPC),3)
^
SyntaxError: invalid syntax
I wrote a Python script (below) which load data from a text file (using pandas) and checks the values in the columns.
import sys
import pandas as pd
import numpy as np
from numpy import ndarray
import math
import matplotlib.pyplot as plt
from matplotlib.pyplot import *
from skimage import data
from skimage.feature import match_template
if __name__ == '__main__':
data = pd.read_csv('Fe_PSI_spt_refined.txt', sep=" ", header = None)
data.columns = ["Angle_number", "Omega", "Intensity", "X", "Y", "Address", "ID"]#, "flag"]
Number_of_projections = 181
Number_of_lines_in_txt = 3493
numrows = len(data)
counter_array = []
correlation_threshold_value = 0.7
a = np.zeros(Number_of_lines_in_txt)
output_file = ("output.txt")
for i in range(2, (Number_of_projections + 1)):
filename_cutouts_combined = ("cutouts_combined_%03i.txt" % (i))
filename_cutouts_combined_tag = ("cutouts_combined_tag_%03i.txt" % (i))
image = np.loadtxt(filename_cutouts_combined)
image_tagged = np.loadtxt(filename_cutouts_combined_tag)
for j in range(0, Number_of_lines_in_txt - 1):
print data.Angle_number[j], i
After one iteration of j I get the error below. Do you spot any error I should fix? Thanks
`Traceback (most recent call last):
File "Hyperbola_search.py", line 46, in <module>
print data.Angle_number[j], i
File "/Users/Alberto/anaconda/lib/python2.7/site-packages/pandas/core/series.py", line 491, in __getitem__
result = self.index.get_value(self, key)
File "/Users/Alberto/anaconda/lib/python2.7/site-packages/pandas/core/index.py", line 1032, in get_value
return self._engine.get_value(s, k)
File "index.pyx", line 97, in pandas.index.IndexEngine.get_value (pandas/index.c:2661)
File "index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas/index.c:2476)
File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3215)
File "hashtable.pyx", line 382, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6450)
File "hashtable.pyx", line 388, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6394)
KeyError: 3491`
You load files into image and image_tagged, while a remains unused.
I don't know what data.Angle_number and numrows are, but they appear to be from libraries, not related to your files.