how to create the outliers for dates in python

how to create the outliers for dates in python - python

my purpose is to create an anomaly graph for a stock that have dates and close. I tried to create outliers, but I get the lines not in the place I want. For example, I want the line to be in the year of 2019 and after 2020 where there are drastic changes. The X line has dates and the problem I don't know how to write the outliers
I thought to write y["2019"]=40 for example but it doesn't do anything
from pandas import read_csv
from matplotlib import pyplot
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
#from IPython.core.debugger import set_trace
#import data
AAPL= pd.read_csv('AAPL.csv', header=0, squeeze=True)
x=AAPL['Date']
x=pd.to_datetime(x)
y=AAPL['Close/Last']
plt.figure(figsize=(15,7))
plt.plot(x, y, label="Close")
plt.title("AAPL")
plt.xlabel("Time")
plt.ylabel("Close")
plt.xticks(rotation=0)
plt.grid()
plt.show()
y[5] = 5
y[60] =55
y[85] = 1.4
n_outliers = 3
plt.figure(figsize=(15,7))
plt.plot(x,y)
plt.scatter(x,y)
plt.grid()
plt.ylabel('Y')
plt.xlabel('x')
plt.show()
Thank you in advance

Related

matplotlib to show x-axis with custom date formats and interval

Using matplotlib and mpl_finance to plot candlesticks. Data is in csv AAPL.
I want to show the x-axis as year and month only, i.e."yyyy-mmm", so:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
import matplotlib.dates as mdates
data = pd.read_csv('C:\\AAPL.csv', delimiter = "\t")
data = data.sort_values(['Date'], ascending=True)
data = data.tail(100)
fig = plt.figure(figsize=(6,4))
plt.ylim(60, 200)
ax1 = fig.add_subplot(111)
cl =candlestick2_ohlc(ax=ax1,opens=data['Open'],highs=data['High'],lows=data['Low'],closes=data['Close'],width=0.6)
ax1.set_xticks(np.arange(len(data)))
ax1.set_xticklabels(data['Date'], fontsize=10, rotation=90)
# every month of the year like 2008-Jan, 2008-Feb...
locator = mdates.MonthLocator()
fmt = mdates.DateFormatter('%Y-%b')
X = plt.gca().xaxis
X.set_major_locator(locator)
X.set_major_formatter(fmt)
plt.show()
It doesn't show anything.
Also tried below but doesn't work neither:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
How can I have the x-axis only show the year and month??
Thank you.

Try following solution,
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates
data = pd.read_csv('C:\AAPL.csv')
data = data.sort_values(['Date'], ascending=True)
data = data.tail(100)
from matplotlib.dates import date2num, DayLocator, DateFormatter
data['Date'] = date2num(pd.to_datetime(data['Date']).tolist())
fig, ax=plt.subplots(figsize=(10, 10))
candlestick_ohlc(ax, data.as_matrix(),width=0.6)
ax.set(xlabel='AAPL')
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%b'))
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=4))
plt.show()
Note: I have used candlestick_ohlc instead of candlestick2_ohlc.
Output :

Is there a way to plot the z-Value in python

I'm have got a programme that uses matplotlib and pandas to plot the rolling mean and standard deviation for the price of bitcoin. I'm wondering how I can plot the z values ( the number of standard deviations the price is from the mean).
import pandas as pd
from matplotlib import pyplot as plt
btc_1_day = pd.read_csv('C:\Users\Oliver\Desktop\data\data1_btcusdt_1day.csv')
df1_btc = pd.DataFrame(btc_1_day)
df1_btc['SMA_10'] = df1_btc.price_close.rolling(10).mean()
df1_btc['SMSD_10'] = df1_btc.price_close.rolling(10).std()
plt.grid(True)
plt.plot(btc_1_day.price_close)
plt.plot(df1_btc['SMA_10'],label='10 day moving average')
plt.plot(df1_btc['SMSD_10'],label='10 day standard deviation')
plt.legend(loc=2)
plt.show()

Since I don't have your csv file, I'll show you how I would do this using some random data and a pandas dataframe. You can find the z score using stats.zscore(df['btc'], but that would give you numbers on a very different scale from the ones you're trying to plot in your example.
Plot 1:
Code 1:
import pandas as pd
from matplotlib import pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime
from scipy import stats
# data
np.random.seed(1234)
numdays=100
df = pd.DataFrame({'btc': (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()})
# moving averages and standard deviations
df['SMA_10'] = df['btc'].rolling(10).mean()
df['SMSD_10+sigma'] = df['btc'].rolling(10).mean()+df['btc'].rolling(10).std()
df['SMSD_10-sigma'] = df['btc'].rolling(10).mean()-df['btc'].rolling(10).std()
# matplotlib
df['ZScore']=stats.zscore(df['btc'])
plt.figure()
df['btc'].plot()
df['ZScore'].plot()
plt.show()
In order to illustrate your dataset together with averages and starndard deviations for rolling windows, I'd rather use an approach such as:
Plot 2:
Code 2:
import pandas as pd
from matplotlib import pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime
from scipy import stats
# data
np.random.seed(1234)
numdays=100
df = pd.DataFrame({'btc': (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()})
# moving averages and standard deviations
df['SMA_10'] = df['btc'].rolling(10).mean()
df['SMSD_10+sigma'] = df['btc'].rolling(10).mean()+df['btc'].rolling(10).std()
df['SMSD_10-sigma'] = df['btc'].rolling(10).mean()-df['btc'].rolling(10).std()
# matplotlib
plt.grid(True)
plt.plot( df['btc'])
plt.plot(df['SMA_10'],label='10 day moving average')
plt.plot(df['SMSD_10+sigma'],label='10 day standard deviation',
color='green',
linewidth=0.5)
plt.plot(df['SMSD_10-sigma'],label='10 day standard deviation',
color='green',
linewidth=0.5)
plt.plot(df['btc'], color='blue', linewidth=1.5)
plt.legend(loc=2)
plt.show()

How to change xticks to yearly interval in pandas time series plot

I am very new to pandas, and I have searched many StackOverflow questions similar to this for changing xtick labels yearly, but they all are different did not solve my problem, so I decided to ask my own question.
Here is my question. I have a mock data frame which I want to plot yearly xticks in the x-axis.
import numpy as np
import pandas as pd
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
df.plot()
This gives:
Xticks ==> 1995 2000 2005 etc
But I want ==> 1991 1992 ... 2019
How to do that?
So far I have tried this:
import matplotlib
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
fig,ax = plt.subplots()
df.plot(ax=ax)
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator(base=1))
# ax.xaxis.set_minor_locator(matplotlib.dates.YearLocator(base=1))
# ax.set_xticklabels(list(df.index.time))
This gives just 2005 as xtick and nothing has worked till now.
Links I looked:
- Changing xticks in a pandas plot
- Python: Change the time on xticks for Pandas Plot
- https://matplotlib.org/3.1.1/api/dates_api.html

You need to use the x_compat=True argument to have pandas choose the units in a way that they are compatible with matplotlib.dates locators and formatters.
df.plot(ax=ax, x_compat=True)
Complete code:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
fig,ax = plt.subplots()
df.plot(ax=ax, x_compat=True)
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator(base=1))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
plt.show()

You can try this:
import datetime
# create xticks
xticks = pd.date_range(datetime.datetime(1990,1,1), datetime.datetime(2020,1,1), freq='YS')
# plot
fig, ax = plt.subplots(figsize=(12,8))
df['value'].plot(ax=ax,xticks=xticks.to_pydatetime())
ax.set_xticklabels([x.strftime('%Y') for x in xticks]);
plt.xticks(rotation=90);
Complete Example
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import datetime
# data
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
# create xticks
xticks = pd.date_range(datetime.datetime(1990,1,1), datetime.datetime(2020,1,1), freq='YS')
# plot
fig, ax = plt.subplots(figsize=(12,8))
df['value'].plot(ax=ax,xticks=xticks.to_pydatetime())
ax.set_xticklabels([x.strftime('%Y') for x in xticks]);
plt.xticks(rotation=90);
plt.show()
This gives:

How to format y-axis to show percentage with Python Jupyter Notebook lmplot? [duplicate]

I have the following pandas plot:
Is it possible to add '%' sign on the y axis not as a label but on the number. Such as it would show instead of 0.0 it would be 0.0% and so on for all the numbers?
Code:
import pandas as pd
from pandas import datetime
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
end = datetime.date.today()
start = datetime.date(2020,1,1)
data = web.DataReader('fb', 'yahoo', start, end)
data['percent'] = data['Close'].pct_change()
data['percent'].plot()

Here is how you can use matplotlib.ticker:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.show()
Output:

You can now control the display format of the y-axis. I think it will be 0.0%.
yvals = ax.get_yticks()
ax.set_yticklabels(["{:,.1%}".format(y) for y in yvals], fontsize=12)

You can also use plt.gca() instead of using ax
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0))

Mixed format dates in python matplotlib [duplicate]

I am looking to edit the formatting of the dates on the x-axis. The picture below shows how they appear on my bar graph by default. I would like to remove the repetition of 'Dec' and '2012' and just have the actual date numbers along the x-axis.
Any suggestions as to how I can do this?

In short:
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(myFmt)
Many examples on the matplotlib website. The one I most commonly use is here

While the answer given by Paul H shows the essential part, it is not a complete example. On the other hand the matplotlib example seems rather complicated and does not show how to use days.
So for everyone in need here is a full working example:
from datetime import datetime
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
myDates = [datetime(2012,1,i+3) for i in range(10)]
myValues = [5,6,4,3,7,8,1,2,5,4]
fig, ax = plt.subplots()
ax.plot(myDates,myValues)
myFmt = DateFormatter("%d")
ax.xaxis.set_major_formatter(myFmt)
## Rotate date labels automatically
fig.autofmt_xdate()
plt.show()

From the package matplotlib.dates as shown in this example the date format can be applied to the axis label and ticks for plot.
Below I have given an example for labeling axis ticks for multiplots
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
df = pd.read_csv('US_temp.csv')
plt.plot(df['Date'],df_f['MINT'],label='Min Temp.')
plt.plot(df['Date'],df_f['MAXT'],label='Max Temp.')
plt.legend()
####### Use the below functions #######
dtFmt = mdates.DateFormatter('%b') # define the formatting
plt.gca().xaxis.set_major_formatter(dtFmt) # apply the format to the desired axis
plt.show()
As simple as that

This wokrs prfectly for me
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
AutoMinorLocator)
import matplotlib.dates as mdates
dtFmt = mdates.DateFormatter('%Y-%b') # define the formatting
plt.gca().xaxis.set_major_formatter(dtFmt)
# show every 12th tick on x axes
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=90, fontweight='light', fontsize='x-small',)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to create the outliers for dates in python - python

Related

matplotlib to show x-axis with custom date formats and interval

Is there a way to plot the z-Value in python

How to change xticks to yearly interval in pandas time series plot

How to format y-axis to show percentage with Python Jupyter Notebook lmplot? [duplicate]

Mixed format dates in python matplotlib [duplicate]

Categories

Resources