I input:
Data= (np.max(PM10_data)-np.min(PM10_data))
print(Data)
for the normalization of my data, but when I check it by print the data out, it shows:
[15442 rows x 3 columns]
PM10 39.33
Unnamed: 62 1.00
dtype: float64
May i know how could i normalize >10,000 data properly?
Below is the scipt I have:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import numpy as np
import statsmodels.api as sm
from sklearn import preprocessing
#import PM10 data
PM10_data= pd.read_excel(r"C:\Users\Jamie Tsoi\OneDrive\Desktop\FYP\ToUG_Office.xlsx",sheet_name='PM10')
PM10_data=PM10_data.dropna(axis=1,how='any')
PM10_data.max()
#Check null
PM10_data.isna().sum()
#Check null
PM10_data['PM10']
PM10_data.columns = PM10_data.columns.str.strip()
#Check null
PM10_data['PM10']=pd.to_numeric(PM10_data['PM10'])
print(PM10_data)
Data= (np.max(PM10_data)-np.min(PM10_data))
print(Data)
Related
I have data from an excel spreadsheet concerning the UK economy but how do i make a line graph from said information? This is what i have so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA
df = (pd.read_excel('Uk_data.xlsx').dropna())
dataset = df.to_dict()
print(df[0:])
plt.figure(figsize=(9,9))
plt.style.use('seaborn')
plt.title("UK GDP Since 2017")
plt.show()
print(df.head())
I have met ValueError: Exog and Ebndog are in different size.
When I type len(y) or len(y_scaled), it returns 0, but it supposed to be five. Hope for help. Thanks in advance.
import datetime
import dateutil
import pandas_datareader.data as wb
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
year=5
tickers =["0200.KL"]
ohlc = wb.DataReader(tickers, data_source="yahoo",start=datetime.date.today()-dateutil.relativedelta.relativedelta(years=year),end=datetime.date.today())
n=5 #get 5 consecutive data
df =ohlc.copy()
series=df["Adj Close"]
slopes=[i*0 for i in range(n-1)]
for i in range(n,len(series)+1):
y=series[i-n:n]
x=np.array(range(n))
#normalize x and y variable
y_scaled=(y-y.min())/(y.max()-y.min())
X_scaled=(x-x.min())/(x.max()-x.min())
#add a constant to the equation
X_scaled=sm.add_constant(X_scaled)
model=sm.OLS(y_scaled,X_scaled)
results=model.fit()
slopes.append(results.params[-1])
#slope coefficient is the theta in radians
slopes_angle=np.rad2degree(np.arctan(np.array(slopes)))
np.array(slopes_angle)
Solved. Thank you.
Should be y=df["Adj Close"][i-n:i] instead of y=series[n-i:n]
The full code as below:
import datetime
import dateutil
import pandas_datareader.data as wb
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
%matplotlib inline
year=1
tickers ="AAPL"
ohlc = wb.DataReader(tickers, data_source="yahoo",start=datetime.date.today()-dateutil.relativedelta.relativedelta(years=year),end=datetime.date.today())
n=5 #get 5 consecutive datas
df =ohlc.copy()
slopes=[i*0 for i in range(n-1)]
for i in range(n,len(df)+1):
y=df["Adj Close"][i-n:i]
x=np.array(range(n))
#normalize x and y variable
y_scaled=(y-y.min())/(y.max()-y.min())
X_scaled=(x-x.min())/(x.max()-x.min())
#add a constant to the equation
X_scaled=sm.add_constant(X_scaled)
model=sm.OLS(y_scaled,X_scaled)
results=model.fit()
slopes.append(results.params[-1])
#slope coefficient is the theta in radians
slopes_angle=np.rad2deg(np.arctan(np.array(slopes)))
slopes_angle=np.array(slopes_angle)
plt.plot(slopes_angle)
plt.title("Slope Coefficient of 5 Consecutive Stock Price Data")
plt.ylabel("Slope Coefficient")
plt.xlabel("Period")
plt.show()
I need to take data from only 3 columns in my dataset, how do I do this? I am trying to make a correlation graph. This is my code:
import matplotlib.pyplot as plt
import pandas as pd
crimedata = pd.read_csv('MasterFileCSV.csv')
crime_df = pd.DataFrame(crimedata)
plt.matshow(crime_df.corr())
plt.show
I am new to python and I am having some issues to plot my dates from a csv file.
The code is the following:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from pandas import DataFrame
import matplotlib.pyplot as plt
df = pd.read_csv(r"file.csv",index_col=0)
print(df.describe())
BHSI_cycle, BHSI_trend = sm.tsa.filters.hpfilter(df['BHSI-TCA'])
df['BHSI_trend'] = BHSI_trend
df['BHSI_cycle'] = BHSI_cycle
BHSI_plot = df[['BHSI-TCA','BHSI_trend']].plot(figsize=(12,10))
plt.show(BHSI_plot)
BHSI_plot2 = df[['BHSI_cycle']].plot(figsize=(12,10))
plt.show(BHSI_plot2)
And the CSV file is:
Date BHSI-TCA
23/05/2006 14821
25/05/2006 14878
30/05/2006 14837
How can I plot the dates?
Try parsing dates properly when you import from csv.
df = pd.read_csv(r"file.csv", index_col=0, parse_dates=<your_date_column>)
I have a csv file and I want to show this data on grap. I have date,place and status data but I don't need place so I fetch data like this.
And going like this
Here is my code. How can I get a graph with 1-0 values according to date value. Which method should I use ? Thanks
import pandas as pd from pandas
import DataFrame
import datetime
import pandas.io.data
import matplotlib.pyplot as plt from mpl_toolkits.mplot3d
import Axes3D import pylab rows_list=[] df=pd.read_csv('filepath',header=None,parse_dates=True,prefix='column')
for row in df.iterrows():
if row[1][1]=='Beweging in de living':
if row[1][2]=='OPEN': rows_list.append([row[1][0],'1'])
else: rows_list.append([row[1][0],'0'])
df2 = pd.DataFrame(rows_list)
df3=df2.set_index(0)
print df3 plt.plot(df3)
plt.show()