seqmining: how to calculate frequency of a sequence on python

seqmining: how to calculate frequency of a sequence on python - python

I'm trying to use pymining on Python to generate frequent sequences from my dataset. My code below appears to be working well:
from pymining import seqmining
seqs = ( 'caabc', 'abcb', 'cabc', 'abbca')
freq_seqs = seqmining.freq_seq_enum(seqs, 2)
sorted(freq_seqs)
However, when i want to use it with my dataset:
import numpy as np
import pandas as pd
from pymining import seqmining
def importdata():
filename = pd.read_csv('C:/Users/asus/Desktop/memoire/sequences-code.csv', sep= ';', header = None)
data=importdata()
seqs = data
freq_seqs = seqmining.freq_seq_enum(seqs, 2)
sorted(freq_seqs)
I get this error:
TypeError: 'NoneType' object is not iterable
this is all the error:
TypeError Traceback (most recent call last)
<ipython-input-4-19e2af14465a> in <module>()
8 data=importdata()
9 seqs = data
---> 10 freq_seqs = seqmining.freq_seq_enum(seqs, 2)
11 sorted(freq_seqs)
12
~\Anaconda3\lib\site-packages\pymining\seqmining.py in freq_seq_enum(sequences, min_support)
9 '''
10 freq_seqs = set()
---> 11 _freq_seq(sequences, tuple(), 0, min_support, freq_seqs)
12 return freq_seqs
13
~\Anaconda3\lib\site-packages\pymining\seqmining.py in _freq_seq(sdb, prefix, prefix_support, min_support, freq_seqs)
16 if prefix:
17 freq_seqs.add((prefix, prefix_support))
---> 18 locally_frequents = _local_freq_items(sdb, prefix, min_support)
19 if not locally_frequents:
20 return
~\Anaconda3\lib\site-packages\pymining\seqmining.py in _local_freq_items(sdb, prefix, min_support)
28 items = defaultdict(int)
29 freq_items = []
---> 30 for entry in sdb:
31 visited = set()
32 for element in entry:
TypeError: 'NoneType' object is not iterable

The simplest change you can make to your code is to get rid of importdata, which is just a wrapper on pd.read_csv. Try:
filename = 'C:/Users/asus/Desktop/memoire/sequences-code.csv'
data = pd.read_csv(filename, sep=';', header=None)
Let me know if that helps.

Related

python not recognizing pandas_ta module

import requests
import pandas as pd
import pandas_ta as ta
def stochFourMonitor():
k_period = 14
d_period = 3
data = get_data('BTC-PERP',14400,1642935495,1643165895)
print(data)
data = data['result']
df = pd.DataFrame(data)
df['trailingHigh'] = df['high'].rolling(k_period).max()
df['trailingLow'] = df['low'].rolling(k_period).min()
df['%K'] = (df['close'] - df['trailingLow']) * 100 / (df['trailingHigh'] - df['trailingLow'])
df['%D'] = df['%K'].rolling(d_period).mean()
df.index.name = 'test'
df.set_index(pd.DatetimeIndex(df["startTime"]), inplace=True)
print(df)
df.drop(columns=['startTime'])
print(df)
df.ta.stoch(high='High', low='Low',close= 'Close', k=14, d=3, append=True)
#t = ta.stoch(close='close',high='high', low='low', k=14, d=3, append=True)
#df.ta.stoch(close='close',high='high', low='low', k=14, d=3, append=True)
def get_data(marketName,resolution,start_time,end_time):
data = requests.get('https://ftx.com/api/markets/' + marketName + '/candles?resolution=' + str(resolution) + '&start_time=' + str(start_time) + '&end_time=' + str(end_time)).json()
return data
I keep receiving the error 'NoneType' object has no attribute 'name'. See below for full exception. It seems like the code is not recognizing the pandas_ta module but I don't understand why. Any help troubleshooting would be much appreciated.
Exception has occurred: AttributeError (note: full exception trace is shown but execution is paused at: )
'NoneType' object has no attribute 'name'
File "C:\Users\Jason\Documents\TradingCode\FTX Websocket\testing21.py", line 21, in stochFourMonitor
df.ta.stoch(high='High', low='Low',close= 'Close', k=14, d=3, append=True)
File "C:\Users\Jason\Documents\TradingCode\FTX Websocket\testing21.py", line 31, in (Current frame)
stochFourMonitor()

You have to few values in your dataframe. You need at least 17 values (k=14, d=3)
>>> pd.Timestamp(1642935495, unit='s')
Timestamp('2022-01-23 10:58:15')
>>> pd.Timestamp(1643165895, unit='s')
Timestamp('2022-01-26 02:58:15')
>>> pd.DataFrame(get_data('BTC-PERP',14400,1642935495,1643165895)['result'])
0 2022-01-23T12:00:00+00:00 1.642939e+12 35690.0 36082.0 35000.0 35306.0 6.315513e+08
1 2022-01-23T16:00:00+00:00 1.642954e+12 35306.0 35460.0 34601.0 34785.0 7.246238e+08
2 2022-01-23T20:00:00+00:00 1.642968e+12 34785.0 36551.0 34712.0 36271.0 9.663773e+08
3 2022-01-24T00:00:00+00:00 1.642982e+12 36271.0 36283.0 35148.0 35351.0 6.007333e+08
4 2022-01-24T04:00:00+00:00 1.642997e+12 35351.0 35511.0 34821.0 34896.0 5.554126e+08
5 2022-01-24T08:00:00+00:00 1.643011e+12 34895.0 35610.0 33033.0 33709.0 1.676436e+09
6 2022-01-24T12:00:00+00:00 1.643026e+12 33709.0 34399.0 32837.0 34260.0 2.021096e+09
7 2022-01-24T16:00:00+00:00 1.643040e+12 34261.0 36493.0 33800.0 36101.0 1.989552e+09
8 2022-01-24T20:00:00+00:00 1.643054e+12 36101.0 37596.0 35990.0 36673.0 1.202684e+09
9 2022-01-25T00:00:00+00:00 1.643069e+12 36673.0 36702.0 35974.0 36431.0 4.538093e+08
10 2022-01-25T04:00:00+00:00 1.643083e+12 36431.0 36500.0 35719.0 36067.0 3.514587e+08
11 2022-01-25T08:00:00+00:00 1.643098e+12 36067.0 36824.0 36030.0 36431.0 5.830712e+08
12 2022-01-25T12:00:00+00:00 1.643112e+12 36431.0 37200.0 35997.0 36568.0 9.992247e+08
13 2022-01-25T16:00:00+00:00 1.643126e+12 36568.0 37600.0 36532.0 37079.0 8.225219e+08
14 2022-01-25T20:00:00+00:00 1.643141e+12 37077.0 37140.0 36437.0 36980.0 7.892745e+08
15 2022-01-26T00:00:00+00:00 1.643155e+12 36980.0 37242.0 36567.0 37238.0 3.226400e+08
>>> pd.DataFrame(get_data('BTC-PERP',14400,1642935495,1643165895)['result'])
...
AttributeError: 'NoneType' object has no attribute 'name'
Now change 1642935495 ('2022-01-23 10:58:15') by 1642845495 ('2022-01-22 10:58:15':
>>> pd.DataFrame(get_data('BTC-PERP',14400,1642845495,1643165895)['result']).ta.stoch()
STOCHk_14_3_3 STOCHd_14_3_3
13 NaN NaN
14 NaN NaN
15 80.824814 NaN
16 74.665546 NaN
17 72.970512 76.153624
18 73.930097 73.855385
19 80.993469 75.964693
20 84.814444 79.912670
21 89.775352 85.194422

loop the pycountry convert

def country_to_continent(country_name):
country_alpha2 = pc.country_name_to_country_alpha2(country_name)
country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
return country_name
country_name = [i for i in df['country']]
country_to_continent(country_name)
this is my code and I want to loop my df['country'] and get the continent name but it has error TypeError: unhashable type: 'list'
my purpose is to convert a country to the continent and to calculate every sum of continent's suicides cases

This is my entire error message
TypeError Traceback (most recent call last)
<ipython-input-197-3be113b9bda3> in <module>
6
7 country_name = [i for i in df['country']]
----> 8 country_to_continent(country_name)
<ipython-input-197-3be113b9bda3> in country_to_continent(country_name)
1 def country_to_continent(country_name):
----> 2 country_alpha2 = pc.country_name_to_country_alpha2(country_name)
3 country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
4 country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
5 return country_name
D:\pythin\lib\site-packages\pycountry_convert\convert_countries.py in country_name_to_country_alpha2(cn_name, cn_name_format)
68 return cn_name
69
---> 70 if cn_name not in dict_country_name_to_country_alpha2:
71 raise KeyError("Invalid Country Name: '{0}'".format(cn_name))
72
TypeError: unhashable type: 'list'

AttributeError: 'DatetimeIndexResampler' object has no attribute 'rolling'

i have a problem i running this code
resamp = pd.DataFrame()
station_ids = list(set(weather_data.station_id.tolist()))
for _id in station_ids:
idx = weather_data.station_id == _id
ti = time_index[idx]
wdfi = weather_data[idx].set_index(ti)
floating = wdfi[['visibility','temperature','wind_speed', "wind_dir", "Rain"]]
binaries = wdfi[['visibility','temperature','wind_speed', "wind_dir", "Rain"]]
b = binaries.resample('1h').rolling(24).apply(lambda x: x.any())
f = floating.resample('1h').agg({
'wind_speed': 'mean',
'visibility': 'mean',
'temperature': 'mean',
'wind_dir':'mean',
'Rain':'mean'
})
temp = pd.concat((f,b),axis=1)
temp['station_id'] = _id
resamp = resamp.append(temp)
and I get this error
AttributeError Traceback (most recent call last)
in ()
8 floating = wdfi[['visibility','temperature','wind_speed', "wind_dir", "Rain"]]
9 binaries = wdfi[['visibility','temperature','wind_speed', "wind_dir", "Rain"]]
---> 10 b = binaries.resample('1h').rolling(24).apply(lambda x: x.any())
11 f = floating.resample('1h').agg({
12 'wind_speed': 'mean',
~\Anaconda3\envs\arcpro\lib\site-packages\pandas\core\resample.py in getattr(self, attr)
95 return self[attr]
96
---> 97 return object.getattribute(self, attr)
98
99 #property
AttributeError: 'DatetimeIndexResampler' object has no attribute 'rolling'
my pandes v 24
thank you

answer by SvenD could be what you're looking for :
How to convert DatetimeIndexResampler to DataFrame?
"resample no longer returns a dataframe: it's now "lazily evaluated"
at the moment of the aggregation or interpolation. => depending on
your use case, replacing .resample("1D") with
.resample("1D").mean() (i.e. downscaling) or with
.resample("1D").interpolate() (upscaling) could be what you're
after, and they both return a dataframe.
– Svend Sep 15 '16 at 8:57"

Why is the error 'activities-heart-intraday' occurring in my for loop?

In this code, please explain why this error is occurring. I will
share more code of it if anyone is interested.
fit_statsHR=auth2_client.intraday_time_series('activities/heart',base_date=date, detail_level='1sec')
time_list = []
val_list = []
ids = []
dates = []
for i in fit_statsHR['activities-heart-intraday']['dataset']:
val_list.append(i['value'])
time_list.append(i['time'])
ids.append(id)
dates.append(date)
heartdf=pd.DataFrame({'heartRate':val_list,'time':time_list,'userId':ids,'date':dates})
which leads to:
KeyError Traceback (most recent call last)
<ipython-input-3-076c9750910b> in get_hps(auth2_client, id, date)
51 ids = []
52 dates = []
---> 53 for i in (fit_statsHR['activities-heart-intraday']['dataset']):
54 val_list.append(i['value'])
55 time_list.append(i['time'])
KeyError: 'activities-heart-intraday'

comparing monary with pymongo for accessing mongodb

I googled for dealing with mongodb in python and the following came up
https://bitbucket.org/djcbeach/monary/wiki/Home
Based on this link monary suppose outperform pymongo, but the comparisons in this link were from queries, we want to see the difference in writing to mongodb (insert) so we made the following according to example code from link
from monary import Monary
from monary import monary_param as mp
import numpy as np
import time
NUM_BATCHES = 3500
BATCH_SIZE = 200
start = time.time()
types = ["float64"] * 5
fields = ["x1", "x2", "x3", "x4", "x5"]
global params
with Monary("127.0.0.1") as monary:
for i in xrange(NUM_BATCHES):
for l in xrange(BATCH_SIZE):
stuff = [ ]
for j in xrange(5):
record = dict(x1=random.uniform(0, 1),
x2=random.uniform(0, 2),
x3=random.uniform(0, 3),
x4=random.uniform(0, 4),
x5=random.uniform(0, 5)
)
stuff.append(record)
params = mp.MonaryParam.from_lists(np.array(stuff), fields)
monary.insert('mydb','collection',params)
end =time.time()
print 'Total time elapsed: %02d:%02d'% divmod((end - start), 60)
Why we keep getting errors?
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-58-9ad6ec8c81e5> in <module>()
25 stuff.append(record)
26 #print len(np.array(stuff.append(record)))
---> 27 params = mp.MonaryParam.from_lists(np.array(stuff), fields)
28 monary.insert('mydb','collection',params)
29 # with Monary("127.0.0.1") as monary:
/Users/kelvin/anaconda/envs/gl-env/lib/python2.7/site-packages/monary/monary_param.pyc in from_lists(cls, data, fields, types)
77 raise ValueError(
78 "Data and fields must be of equal length.")
---> 79 return cls.from_groups(zip(data, fields))
80 else:
81 if not (len(data) == len(fields) == len(types)):
/Users/kelvin/anaconda/envs/gl-env/lib/python2.7/site-packages/monary/monary_param.pyc in from_groups(cls, groups)
93 - `groups`: List of items to be passed to MonaryParam.
94 """
---> 95 return list(map(lambda x: cls(x), groups))
96
97 def __len__(self):
/Users/kelvin/anaconda/envs/gl-env/lib/python2.7/site-packages/monary/monary_param.pyc in <lambda>(x)
93 - `groups`: List of items to be passed to MonaryParam.
94 """
---> 95 return list(map(lambda x: cls(x), groups))
96
97 def __len__(self):
/Users/kelvin/anaconda/envs/gl-env/lib/python2.7/site-packages/monary/monary_param.pyc in __init__(self, array, field, mtype)
39 if len(array) == 2:
40 array, field = array
---> 41 mtype = str(array.data.dtype)
42 else:
43 array, field, mtype = array
AttributeError: 'dict' object has no attribute 'data

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

seqmining: how to calculate frequency of a sequence on python - python

The simplest change you can make to your code is to get rid of importdata, which is just a wrapper on pd.read_csv. Try: filename = 'C:/Users/asus/Desktop/memoire/sequences-code.csv' data = pd.read_csv(filename, sep=';', header=None) Let me know if that helps.

Related

python not recognizing pandas_ta module

loop the pycountry convert

AttributeError: 'DatetimeIndexResampler' object has no attribute 'rolling'

Why is the error 'activities-heart-intraday' occurring in my for loop?

comparing monary with pymongo for accessing mongodb

Categories

Resources