I'm trying to read a csv file that has a date that looks like this: 2018-02-20T22:41:33.793000Z and I can't figure out how to use that as the x axis in matplotlib. I obviously have no idea what I"m doing with datetime because I can't seem to get it to be usefull numbers for matplotlib.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
import os
import matplotlib
import matplotlib.dates as mdates
import datetime
from datetime import datetime
data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'new.csv'))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
b = data.iloc[:, 8] #reading from column 8
a = pd.to_datetime(b, format='%Y-%m-%dT%H:%M:%S.%fZ', errors='ignore')
print(a)
x = a #date #.iloc[:, 0] grabs column without header
y = data.iloc[:, 2] #number #reading from column 2
z = data.iloc[:, 6] #quantity #reading from column 6
#c = data.bluered #blue or red
ax.scatter(x, y, z) #ax.scatter(x, y, z, c=c)
plt.xlim(-1, 35)
plt.ylim(0, 300)
ax.set_zlim(-1,150)
plt.show()
After much frustration I found the problem I was having was just simply to change plt.xlim(-1, 35) to plt.xlim('2018-02-20 22:41:26.419000','2018-02-20 22:48:55.768000')...which is the date/time range I was looking at.
Related
This question already has answers here:
How to plot in multiple subplots
(12 answers)
Closed 1 year ago.
I want to arrange 5 histograms in a grid. Here is my code and the result:
I was able to create the graphs but the difficulty comes by arranging them in a grid. I used the grid function to achieve that but i need to link the graphs to it in the respective places.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
Openness = df['O']
Conscientiousness = df['C']
Extraversion = df['E']
Areeableness = df['A']
Neurocitism = df['N']
grid = plt.GridSpec(2, 3, wspace=0.4, hspace=0.3)
# Plot 1
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['O'], bins = 100)
plt.title("Openness to experience")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 2
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['C'], bins = 100)
plt.title("Conscientiousness")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 3
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['E'], bins = 100)
plt.title("Extraversion")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 4
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['A'], bins = 100)
plt.title("Areeableness")
plt.xlabel("Value")
plt.ylabel("Frequency")
# Plot 5
import matplotlib.pyplot as plt
import numpy as np
plt.hist(df['N'], bins = 100)
plt.title("Neurocitism")
plt.xlabel("Value")
plt.ylabel("Frequency")
Results merge everything into one chart
But it should look like this
Could you guys please help me out?
You can use plt.subplots:
fig, axes = plt.subplots(nrows=2, ncols=2)
this creates a 2x2 grid. You can access individual positions by indexing hte axes object:
top left:
ax = axes[0,0]
ax.hist(df['C'], bins = 100)
ax.set_title("Conscientiousness")
ax.set_xlabel("Value")
ax.set_ylabel("Frequency")
and so on.
You also continue use GridSpec. Visit https://matplotlib.org/stable/tutorials/intermediate/gridspec.html
for example -
fig2 = plt.figure(constrained_layout=True)
spec2 = gridspec.GridSpec(ncols=2, nrows=3, figure=fig2)
f2_ax1 = fig2.add_subplot(spec2[0, 0])
f2_ax2 = fig2.add_subplot(spec2[0, 1])
f2_ax3 = fig2.add_subplot(spec2[1, 0])
f2_ax4 = fig2.add_subplot(spec2[1, 1])
f2_ax5 = fig2.add_subplot(spec2[2, 1])
# Plot 1
f2_ax1.hist(df['O'])
f2_ax1.set_title("Openness to experience")
f2_ax1.set_xlabel("Value")
f2_ax1.set_ylabel("Frequency")
` plt.show()
I am quite new to python so please bear with me.
My code so far is below:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
df = pd.read_csv(r"/Users/aaronhuang/Desktop/ffp/exfileCLEAN2.csv", skiprows=[1])
magnitudes = df['Magnitude '].values
times = df['Time '].values
zscores = np.abs(stats.zscore(magnitudes, ddof=1))
outlier_indicies = np.argwhere(zscores > 3).flatten()
numbers = print(times[outlier_indicies])
window = 2
num = 1
x = times[outlier_indicies[num]- window:outlier_indicies[num]+window+1]
y = magnitudes[outlier_indicies[num]- window:outlier_indicies[num]+window+1]
plt.plot(x, y)
plt.xlabel('Time (units)')
plt.ylabel('Magnitude (units)')
plt.show()
fig = plt.figure()
Currently, the code only prints one graph, determined by num. I would like it to print all the graphs at once, using plt.subplots, which I think is the easiest way.
I would be great if someone could help me integrate plt.subplots as I don't really know where to start.
Thanks
PS: Here is the data if it would be useful.
Graph created.
The cause of the error is an extra space at the end of the column name in the provided CSV file. The code fixes that. If you fixed the column names in the original data, you should also fix the code.
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
# df = pd.read_csv(r"/Users/aaronhuang/Desktop/ffp/exfileCLEAN2.csv", skiprows=[1])
df = pd.read_csv(r"./exfileCLEAN2.csv", skiprows=[1])
magnitudes = df['Magnitude '].values
times = df['Time '].values
zscores = np.abs(stats.zscore(magnitudes, ddof=1))
outlier_indicies = np.argwhere(zscores > 3).flatten()
numbers = print(times[outlier_indicies])
import matplotlib.pyplot as plt
fig, axes = plt.subplots(6, 10, figsize=(30,30))
for i in range(6):
for j in range(10):
x = df.iloc[j*10:(j+1)*10,:]
axes[i][j].plot(x['Time '], x['Magnitude '])
axes[i][j].set_xticklabels(x['Time '], rotation=45)
window = 2
num = 1
x = times[outlier_indicies[num] - window:outlier_indicies[num]+window+1]
y = magnitudes[outlier_indicies[num] - window:outlier_indicies[num]+window+1]
plt.plot(x, y)
plt.xlabel('Time (units)')
plt.ylabel('Magnitude (units)')
plt.show()
fig = plt.figure()
I am trying to plot a graph using matplotlib.pyplot.
import matplotlib.pyplot as plt
import numpy as np
x = [i for i in range (1,201)]
y = np.loadtxt('final_fscore.txt', dtype=np.float128)
plt.plot(x, y, lw=2)
plt.show()
It looks something like this:
I want to mark the first value of x where y has reached the highest ( which is already known, say for x= 23, y= y[23]), like this figure shown below:
I have been searching this for some time now, with little success. I have tried adding a straight line for now, which is not behaving the desired way:
import matplotlib.pyplot as plt
import numpy as np
x = [i for i in range (1,201)]
y = np.loadtxt('final_fscore.txt', dtype=np.float128)
plt.plot(x, y, lw=2)
plt.plot([23,y[23]], [23,0])
plt.show()
Resulting graph:
Note: I want to make the figure like in the second graph.
It's not clear what y[23] would do here. You would need to find out the maximum value and the index at which this occurs (np.argmax). You may then use this to plot a 3 point line with those coordinates.
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(9)
x = np.arange(200)
y = np.cumsum(np.random.randn(200))
plt.plot(x, y, lw=2)
amax = np.argmax(y)
xlim,ylim = plt.xlim(), plt.ylim()
plt.plot([x[amax], x[amax], xlim[0]], [xlim[0], y[amax], y[amax]],
linestyle="--")
plt.xlim(xlim)
plt.ylim(ylim)
plt.show()
i want to make graph using matplotlib in python.
np.load(name.npy')
i searched many things and i tried
for example..just...
x = [dt.datetime(2003, 05, 01), dt.datetime(2008, 06, 01)]
df = np.load(r'file')
y = df
Replace the end date on the date-range to your desired graph, and the 'y' should be array loaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
start_date = '2003-05-01'
y = np.load(r'c:\python27\abnormal.npy')
x = pd.date_range(start=start_date, periods=len(y), freq='D')
plt.plot(x,y,'.')
plt.show()
If your input array does not contain ordered pairs such as [(0,1), (1,1), (2,2)] and only contains one set of numbers '[1, 2, 3, 4]`, you neeed to create a set of x-coordinates. For a time series in days, you could try something like this:
import datetime
import numpy as np
import matplotlib.pyplot as plt
def getData(fileName):
# Load the data file to serve as y-axis coordinates
y = np.load(fileName)
# For each y coordinate we need an x coordinate
time_offset = list(range(len(y)))
# Convert time_offset to a time-series
# We will assume x-values equal number of days since a beginDate
x = []
beginDate = datetime.date(2015, 6, 1) # The date to begin our time series
for n in time_offset:
date = beginDate + datetime.timedelta(n) # Date + number_of_Days_passed
x.append(date)
return x, y
def plot(x, y):
# Plot the data
fig = plt.figure()
ax = plt.subplot2grid((1,1), (0,0), rowspan=1, colspan=1)
ax.scatter(x, y)
for label in ax.xaxis.get_ticklabels():
label.set_rotation(90)
ax.grid(True)
plt.subplots_adjust(left=.10, bottom=.19, right=.93, top=.95, wspace=.20, hspace=0)
plt.show()
x, y = getData('abnormal.npy')
plot(x, y)
I use pandas to read a csv file to do some analysis. But the returned type is pandas.core.series.Series, which can not be converted to num using the command matplotlib.dates.date2num. Below is my code:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_file, show
import matplotlib.dates as mdates
import time
import matplotlib.pyplot as plt
AAPL = pd.read_csv(
"http://ichart.yahoo.com/table.csvs=AAPL&a=0&b=1&c=2009&d=0&e=1&f=2010",
parse_dates=['Date']
)
x = AAPL['Date']
y = AAPL['Close']
print type(x)
x = mdates.date2num(x)
z4 = np.polyfit(x, y, 6)
p4 = np.poly1d(z4)
xx = np.linspace(x.min(), x.max(), 100)
dd = mdates.num2date(xx)
plt.plot(dd,p4(xx))
The command print type(x) returns pandas.core.series.Series. This one x = mdates.date2num(x) returns the error as: AttributeError: 'numpy.datetime64' object has no attribute 'toordinal'.
Any one can shed a light on me for this issue?
Use x.astype(datetime) to convert to datetime.
from datetime import datetime
x = mdates.date2num(x.astype(datetime))
z4 = np.polyfit(x, y, 6)
p4 = np.poly1d(z4)
xx = np.linspace(x.min(), x.max(), 100)
dd = mdates.num2date(xx)
plt.plot(dd,p4(xx))