How to put x-axis values in order - python

I have an Excel sheet with some players' heights/weights/ages etc. I'm trying to make a basic graph where I can show an average height/weight ratio and order x-axis from low to high? Sorry I'm just a beginner
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
var = pd.read_excel("C:\Program Files\currentnbaplayerslist.xlsx")
print(var)
x = list(var['Height'])
y = list(var['Weight'])
plt.figure(figsize=(10,10))
plt.style.use('ggplot')
plt.scatter(x,y,marker="o",s=100,edgecolors="white",c="green")
plt.title("NBA players' height/weight")
plt.xlabel("Height")
plt.ylabel("Weight")
plt.gcf().autofmt_xdate()
plt.show()
this is the result i get:

Without seeing your data, I can only make an assumption here. But it looks like you have 2 measurements for height. You need to convert it to be all the same type. So here's a function to convert 7' 4" into centimetres. Then it should work.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
def feet_to_cm(x):
try:
h_cm = float(x)
return h_cm
except:
h_ft, h_inch = [float(s) for s in re.findall(r'-?\d+\.?\d*', x)]
h_inch += h_ft * 12
h_cm = h_inch * 2.54
return h_cm
data = {'Height':[190.58,198.12,187.96, "7' 4"],
'Weight':[240.3, 278.25, 180.5, 166]}
#var = pd.read_excel("C:\Program Files\currentnbaplayerslist.xlsx")
var = pd.DataFrame(data)
print(var)
var['Height'] = var.apply(lambda row: feet_to_cm(row['Height']), axis=1)
x = list(var['Height'])
y = list(var['Weight'])
plt.figure(figsize=(10,10))
plt.style.use('ggplot')
plt.scatter(x,y,marker="o",s=100,edgecolors="white",c="green")
plt.title("NBA players' height/weight")
plt.xlabel("Height")
plt.ylabel("Weight")
plt.gcf().autofmt_xdate()
plt.show()

Related

how to use seaborn to draw a barplot with different colors on different groubed variables

I'm trying to draw a barplot with different colors on different grouped variables.
I'm using this code:
import pandas as pd
import numpy as np
import seaborn as sns
index_list = ["factor_" + str(x+1) for x in range(5)]
data = pd.DataFrame(index = index_list)
for i in range(5):
data["stock_" + str(i+1)] = np.random.randint(1,10,5)
sns.barplot(data = data, x = data["stock_1"], y = data.index)
what I got are 5 bars with different colors. If I want the first two bars have the same color(assume they belong to the same group), the second two bars have the same color, and put a legend on the right side, how to do that?
Help me plz:)
barplot
You may try the following:
import pandas as pd
import numpy as np
import seaborn as sns
index_list = ["factor_" + str(x+1) for x in range(5)]
data = pd.DataFrame(index = index_list)
for i in range(5):
data["stock_" + str(i+1)] = np.random.randint(1,10,5)
clrs = palette=["b" if 1<=x+1<=2 else ('r' if 3<=x+1<=4 else 'k')for x in range(5)]
sns.barplot(data = data, x = data["stock_1"], y = data.index, palette=clrs)
Output:

Scatterplot error : "x and y must be the same size" but they have the same size

I would like to make a scatterplot with the dataframe :"df_death_mois1". But it doesn't work. The error message is : "x and y must be the same size". Can you help me ?
import pandas as pd
import matplotlib.pyplot as plt
members = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/members.csv")
expeditions = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/expeditions.csv")
expeditions['highpoint_date'] = pd.to_datetime(expeditions['highpoint_date'])
lesmois = expeditions['highpoint_date'].dt.month
expeditions["mois"] = lesmois
expeditions
df_members_mois = pd.merge(members,expeditions[['expedition_id','mois']], on='expedition_id', how='inner')
df_death_mois = df_members_mois[df_members_mois["death_cause"]=="Avalanche"]
df_death_mois
df_death_mois1 = df_death_mois.groupby("mois")['death_cause'].count()
df_death_mois1 = df_death_mois1.to_frame()
df_death_mois1
plt.scatter(x="mois", y = "death_cause", data = df_death_mois1)
plt.title('scatterplot')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
reset_index and then call plot.scatter:
>>> df_death_mois1.reset_index().plot.scatter(x="mois", y="death_cause")
With matplotlib.pyplot you can use:
>>> plt.scatter(x=df_death_mois1.index, y=df_death_mois1["death_cause"])

Python plot with unique colors for more than 10 lines

I am trying to plot the line graph with around 15 to 50 items, colors are repeating that makes the graph not usable.
I have tried the answers from several method in the answers of a similar question like numpy, random.
However, i am unable to find a easy way to do this .
import matplotlib.pyplot as plt
import os
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.dates as dates
import matplotlib.colors as colors
import numpy as np
df2=pd.read_csv("Portperfdetails.csv")
df3 = df2.drop(df2.index[0])
df3['DATETIME'] = pd.to_datetime(df3['DATETIME'])
portname=list(dict.fromkeys(df3['PORT_NAME']))
for i in range(len(portname)):
X = []
Y = []
X = list(df3.loc[df3['PORT_NAME'] == '%s' % portname[i]]['DATETIME'])
Y = list(df3.loc[df3['PORT_NAME'] == '%s' % portname[i]]['TOTAL_MBYTES'])
ax = plt.axes()
ax.xaxis.set_minor_locator(dates.HourLocator(interval=4)) # every 4 hours
ax.xaxis.set_minor_formatter(dates.DateFormatter('%H:%M')) # hours and minutes
ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) # every day
ax.xaxis.set_major_formatter(dates.DateFormatter('\n%d-%m-%Y'))
for i in range(len(Y)):
Y[i] = int(Y[i])
num_plots = 20
plt.plot(X, Y)
plt.ylabel('Port throughput')
plt.xlabel('Time')
plt.savefig('example.png')
Graph
I'll use a toy example since I do not have access to your data (df3).
I adapted this directly from the List of named colors example in the Matplotlib Gallery. The idea is to iterate over color names along with each line that is being plotted and use the color name to specify the color for each line.
from matplotlib import pyplot as plt
import matplotlib.colors as colors
fig, ax = plt.subplots()
lotsa_colors = colors.get_named_colors_mapping()
for cname,i in zip(lotsa_colors,range(50)):
y = [n for n in range(i,i+10)]
#print(cname,lotsa_colors[name])
ax.plot(y,color=lotsa_colors[cname])
plt.show()
#plt.close()
Looks like there are 1163 color names and 1105 unique colors
len(set(lotsa_colors.values()))
If you wanted to you could randomize the color names.
import random
lotsa_colors = colors.get_named_colors_mapping()
lotsa_colors = list(lotsa_colors.keys())
random.shuffle(lotsa_colors)

checking if the plot hit a number or not?

How to check for any given number how many times a plot hit that number horizontally and get higher?
I have already tried:
import os
import numpy as np
import pylab as plt
import pandas as pd
df = pd.read_csv('C:/Users/Payam/Desktop/tesla-stock-price.csv')
df['avg'] = df[['high', 'low']].mean(axis=1)
e=df['avg'].values
x = df['date'].values
y = df['close'].values
z = df['open'].values
f, ax = plt.subplots(figsize=(20,10))
ax.plot(x,y,'b')
ax.set_xticks(x[::150]);
plt.xticks(rotation=90)
ax.plot(np.arange(len(x)),np.zeros(len(x))+50,'k.')
Given some test array
test_array=np.array([1,6,8,65,4,2,5,8,9,6,4,6,9,0,8,6,4,32,])
you can get all indices where the value is greater than a number, say 5, like so
print (np.where(test_array>5))
print (test_array[np.where(test_array>5)])

half yearly colorbar in matplotlib and pandas

I have a panda dataframe. I am making scatter plot and tried to categorize the data based on colorbar. I did it for monthly classification and quality classification as shown in the example code below.
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
plt.scatter(df['a'],df['b'],c = df.index.month)
plt.colorbar()
And also for quality:
plt.scatter(df['a'],df['b'],c = df.index.quarter)
plt.colorbar()
My question: is there any way to categorize by half yearly. for example from the month 1-6 and 7-12 and also by month like: 10-3 and 4-9
Thank you and your help/suggestion will be highly appreciated.
Make a custom function to put in scatter function to color argument. I made an example for half yearly division. You can use it as template for your own split function:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
# if month is 1 to 6 then the first halfyear else the second halfyear
def halfyear(m):
return 0 if (m <= 6) else 1
# vectorize function to use with Series
hy = np.vectorize(halfyear)
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
# apply custom function 'hy' for 'c' argument
plt.scatter(df['a'],df['b'], c = hy(df.index.month))
plt.colorbar()
plt.show()
Another way to use lambda function like:
plt.scatter(df['a'],df['b'], \
c = df.index.map(lambda m: 0 if (m.month > 0 and m.month < 7) else 1))
I would opt for a solution which does not completely truncate the monthly information. Using colors which are similar but distinguishable for the months allows to visually classify by half-year as well as month.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors
a = np.random.rand(366)
b = np.random.rand(366)*0.4
index = (pd.date_range(pd.to_datetime('01-01-2000'), periods=366))
df = pd.DataFrame({'a':a,'b':b},index = index)
colors=["crimson", "orange", "darkblue", "skyblue"]
cdic = list(zip([0,.499,.5,1],colors))
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("name", cdic,12 )
norm = matplotlib.colors.BoundaryNorm(np.arange(13)+.5,12)
plt.scatter(df['a'],df['b'],c = df.index.month, cmap=cmap, norm=norm)
plt.colorbar(ticks=np.arange(1,13))
plt.show()

Categories

Resources