How to check for any given number how many times a plot hit that number horizontally and get higher?
I have already tried:
import os
import numpy as np
import pylab as plt
import pandas as pd
df = pd.read_csv('C:/Users/Payam/Desktop/tesla-stock-price.csv')
df['avg'] = df[['high', 'low']].mean(axis=1)
e=df['avg'].values
x = df['date'].values
y = df['close'].values
z = df['open'].values
f, ax = plt.subplots(figsize=(20,10))
ax.plot(x,y,'b')
ax.set_xticks(x[::150]);
plt.xticks(rotation=90)
ax.plot(np.arange(len(x)),np.zeros(len(x))+50,'k.')
Given some test array
test_array=np.array([1,6,8,65,4,2,5,8,9,6,4,6,9,0,8,6,4,32,])
you can get all indices where the value is greater than a number, say 5, like so
print (np.where(test_array>5))
print (test_array[np.where(test_array>5)])
Related
I have an Excel sheet with some players' heights/weights/ages etc. I'm trying to make a basic graph where I can show an average height/weight ratio and order x-axis from low to high? Sorry I'm just a beginner
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
var = pd.read_excel("C:\Program Files\currentnbaplayerslist.xlsx")
print(var)
x = list(var['Height'])
y = list(var['Weight'])
plt.figure(figsize=(10,10))
plt.style.use('ggplot')
plt.scatter(x,y,marker="o",s=100,edgecolors="white",c="green")
plt.title("NBA players' height/weight")
plt.xlabel("Height")
plt.ylabel("Weight")
plt.gcf().autofmt_xdate()
plt.show()
this is the result i get:
Without seeing your data, I can only make an assumption here. But it looks like you have 2 measurements for height. You need to convert it to be all the same type. So here's a function to convert 7' 4" into centimetres. Then it should work.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
def feet_to_cm(x):
try:
h_cm = float(x)
return h_cm
except:
h_ft, h_inch = [float(s) for s in re.findall(r'-?\d+\.?\d*', x)]
h_inch += h_ft * 12
h_cm = h_inch * 2.54
return h_cm
data = {'Height':[190.58,198.12,187.96, "7' 4"],
'Weight':[240.3, 278.25, 180.5, 166]}
#var = pd.read_excel("C:\Program Files\currentnbaplayerslist.xlsx")
var = pd.DataFrame(data)
print(var)
var['Height'] = var.apply(lambda row: feet_to_cm(row['Height']), axis=1)
x = list(var['Height'])
y = list(var['Weight'])
plt.figure(figsize=(10,10))
plt.style.use('ggplot')
plt.scatter(x,y,marker="o",s=100,edgecolors="white",c="green")
plt.title("NBA players' height/weight")
plt.xlabel("Height")
plt.ylabel("Weight")
plt.gcf().autofmt_xdate()
plt.show()
If i have a dataset:
import numpy as np
import matplotlib.pyplot as plt
y = np.array([np.random.randn(10) for k in range(100)])
plt.plot(y)
And a corresponding vector of categorical values:
x = np.array([['nr1', 'nr2'] for k in range(5)]).reshape(10,1)
How do i color according to whether the y is 'nr1' or 'nr2'?
It's also ok if pandas plot() function have an easy way.
It should preferably be scalable so there can be more than two groups in x.
Edit: This already includes the comment and is working for any number of categories.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
y = np.array([np.random.randn(6) for k in range(100)])
x = np.array([['nr1', 'nr2', 'n3'] for k in range(2)]).reshape(6)
uniques, inverse = np.unique(x, return_inverse=True)
c = mpl.cm.Set1(inverse)
for i in range(y.shape[1]):
plt.plot(y[:,i], color=c[i])
I'm a newbie to Altair, and I would like to change the number of bars being plotted in a bar plot. I have done some research online, but couldn't find anything helpful. Here is my code:
import altair as alt
import pandas as pd
import numpy as np
# Generate a random np array of size 1000, and our goal is to plot its distribution.
my_numbers = np.random.normal(size = 1000)
my_numbers_df = pd.DataFrame.from_dict({'Integers': my_numbers})
alt.Chart(my_numbers_df).mark_bar(size = 10).encode(
alt.X("Integers",
bin = True,
scale = alt.Scale(domain=(-5, 5))
),
y = 'count()',
)
The plot right now looks something like this
You can increase the number of bins by passing an alt.Bin() object and specifying the maxbins
import altair as alt
import pandas as pd
import numpy as np
# Generate a random np array of size 1000, and our goal is to plot its distribution.
my_numbers = np.random.normal(size = 1000)
my_numbers_df = pd.DataFrame.from_dict({'Integers': my_numbers})
alt.Chart(my_numbers_df).mark_bar(size = 10).encode(
alt.X("Integers",
bin = alt.Bin(maxbins=25),
scale = alt.Scale(domain=(-5, 5))
),
y = 'count()',
)
I am trying to plot the line graph with around 15 to 50 items, colors are repeating that makes the graph not usable.
I have tried the answers from several method in the answers of a similar question like numpy, random.
However, i am unable to find a easy way to do this .
import matplotlib.pyplot as plt
import os
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.dates as dates
import matplotlib.colors as colors
import numpy as np
df2=pd.read_csv("Portperfdetails.csv")
df3 = df2.drop(df2.index[0])
df3['DATETIME'] = pd.to_datetime(df3['DATETIME'])
portname=list(dict.fromkeys(df3['PORT_NAME']))
for i in range(len(portname)):
X = []
Y = []
X = list(df3.loc[df3['PORT_NAME'] == '%s' % portname[i]]['DATETIME'])
Y = list(df3.loc[df3['PORT_NAME'] == '%s' % portname[i]]['TOTAL_MBYTES'])
ax = plt.axes()
ax.xaxis.set_minor_locator(dates.HourLocator(interval=4)) # every 4 hours
ax.xaxis.set_minor_formatter(dates.DateFormatter('%H:%M')) # hours and minutes
ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) # every day
ax.xaxis.set_major_formatter(dates.DateFormatter('\n%d-%m-%Y'))
for i in range(len(Y)):
Y[i] = int(Y[i])
num_plots = 20
plt.plot(X, Y)
plt.ylabel('Port throughput')
plt.xlabel('Time')
plt.savefig('example.png')
Graph
I'll use a toy example since I do not have access to your data (df3).
I adapted this directly from the List of named colors example in the Matplotlib Gallery. The idea is to iterate over color names along with each line that is being plotted and use the color name to specify the color for each line.
from matplotlib import pyplot as plt
import matplotlib.colors as colors
fig, ax = plt.subplots()
lotsa_colors = colors.get_named_colors_mapping()
for cname,i in zip(lotsa_colors,range(50)):
y = [n for n in range(i,i+10)]
#print(cname,lotsa_colors[name])
ax.plot(y,color=lotsa_colors[cname])
plt.show()
#plt.close()
Looks like there are 1163 color names and 1105 unique colors
len(set(lotsa_colors.values()))
If you wanted to you could randomize the color names.
import random
lotsa_colors = colors.get_named_colors_mapping()
lotsa_colors = list(lotsa_colors.keys())
random.shuffle(lotsa_colors)
Say I have the following code:
import random
import matplotlib.pyplot as plt
lis = random.sample(range(1, 5000), 4000)
plt.plot(lis)
This plots the following:
The x-axis is printed from 0 to 4000 with a step size of 1. But I want it to be 0 to 4 with a step size of 0.0001.
I tried this:
import random
import numpy as np
import matplotlib.pyplot as plt
lis = random.sample(range(1, 5000), 4000)
plt.plot(lis)
plt.xlabel(np.linspace(0, 4, num=40001))
But this does not do the job:
How do I do this?
plt.plot() can take only one array as an argument (i.e. plt.plot(y)), then it interprets it as y values and plots it simply over the indices, as in your example. But if you want different x values, you can simply put them before the y values in the argument list like plt.plot(x, y), with x being your x value array, which obviuosly should have the same length as y.
In your case this would mean
import numpy as np
import matplotlib.pyplot as plt
lis = np.random.randint(0, 5000, 4000)
x = np.arange(0, 4, 0.001)
plt.plot(x, lis)
See the docs for further reading: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html