I want to illustrate nicely how often (y-axis) a certain output (x-axis) occurs...
My code produces following plot:
It's not good, because the values are rounded to integers apparently, e.g., there are not over a 100 outputs with 100%, but actually most of them are 99% I think.
The code:
#!/usr/bin/env python3
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
trajectoryIds = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0]
avgSolutionPercentages = [20.6256, 99.1448, 15.6764, 21.8231, 16.3733, 17.7502, 20.0055, 86.6873, 11.3105, 15.6693, 10.3449, 81.8921, 11.6745, 92.6031, 11.8787, 23.0229, 37.9636, 2.3903, 15.1727, 14.7088, 10.0426, 59.6758, 8.0042, 12.4174, 10.0585, 46.0567, 90.2376, 98.3273, 52.8645, 49.3027, 62.4136, 32.6199, 19.0642, 10.3319, 74.6157, 22.5771, 22.4118, 11.2017, 16.5053, 11.2021, 30.8376, 24.5255, 83.1072, 10.1529, 14.3991, 46.3459, 16.2137, 4.5773, 44.9549, 1.0719, 76.5605, 42.6589, 13.6209, 34.2856, 1.3574, 29.0465, 66.8146, 16.4796, 32.9564, 62.0732, 3.7047, 13.8828, 31.6088, 60.1141, 3.3247, 45.0796, 13.7862, 26.4498, 93.6806, 10.3245, 62.5157, 10.9833, 42.5908, 37.3208, 27.4115, 84.1648, 13.9058, 13.9065, 67.8918, 27.9075, 3.6116, 10.9091, 41.0988, 24.2177, 50.2762, 61.3869, 15.5915, 27.6536, 0.7993, 22.9483, 22.3393, 88.1832, 25.1604, 18.3625, 15.7212, 56.9646, 4.0434, 11.8431, 56.0613, 32.5472, 97.8757, 21.8233, 14.8162, 38.8259, 20.5676, 72.7201, 17.7987, 35.8117, 15.1699, 17.0359, 14.0621, 35.9655, 11.9095, 10.5691, 23.3259, 16.1746, 10.1936, 12.5084, 24.1494, 16.4727, 21.0687, 15.7495, 28.8929, 11.0135, 13.3133, 14.6639, 50.1304, 21.0346, 5.1604, 53.5107, 20.0712, 41.5111, 12.1633, 74.3263, 17.7904, 17.1684, 25.3977, 21.5871, 21.9332, 22.6674, 36.6634, 99.1179, 15.3213, 16.3999, 12.0147, 57.5163, 4.2062, 17.3874, 10.7132, 17.4919, 17.8457, 29.3538, 26.1468, 75.1234, 16.4368, 21.6191, 61.1394, 12.9972, 73.5746, 72.5788, 41.6835, 39.9912, 20.1648, 11.7097, 11.5203, 36.7387, 5.0694, 30.8129, 12.0922, 22.5419, 12.3569, 54.6776, 28.3561, 26.1219, 44.7455, 1.3281, 46.5064, 13.6016, 23.5483, 11.7151, 44.3669, 3.2577, 75.0943, 10.8634, 14.8226, 45.7661, 19.7319, 30.7981, 3.5965, 47.8161, 14.5996, 39.4484, 13.0693, 24.9947, 97.4253, 76.7901, 73.1183, 4.0922]
solutionPercentages = [99.2537, 99.8467, 96.4718, 99.6637, 99.6633, 97.1289, 9.7373, 99.5126, 97.3251, 96.0545, 99.6756, 75.6587, 61.1496, 96.7575, 97.1969, 96.5258, 99.7409, 99.8641, 99.8821, 98.5401, 99.7833, 99.6314, 99.7899, 99.9117, 99.5754, 99.5868, 99.7919, 99.9127, 0.0001, 99.7297, 40.8438, 99.8559, 99.6591, 99.8917, 99.3622, 0.0001, 0.0001, 99.4828, 0.0001, 99.8559, 0.0001, 0.0001, 99.6714, 9.9635, 99.8744, 93.8854, 67.3692, 96.3229, 98.4899, 66.9173, 98.2533, 99.8318, 73.9904, 99.8431, 6.2614, 97.2776, 96.0938, 71.9457, 99.9211, 96.1596, 99.8405, 99.6314, 95.4566, 98.4786, 99.8217, 96.1014, 99.0391, 94.6034, 99.8403, 99.9093, 9.8096, 97.8549, 98.7041, 19.9098, 86.3154, 21.5302, 99.2769, 99.0496, 99.7266, 99.8602, 86.7925, 96.3197, 99.9226, 9.4447, 97.9722, 50.4884, 92.2358, 87.4311, 74.2156, 97.8819, 93.2483, 96.3186, 77.9828, 80.2446, 47.1835, 40.8011, 90.5123, 85.7852, 9.8074, 95.9032, 98.5906, 12.5081, 97.0264, 9.9166, 73.6486, 97.8634, 8.4403, 97.7592, 97.9933, 95.8486, 49.7977, 95.1031, 76.1712, 96.1552, 89.0059, 79.6172, 96.7383, 90.8518, 95.8096, 98.2061, 96.3314, 97.5753, 97.9857, 9.0739, 66.9977, 86.5744, 76.8124, 8.6195, 81.3285, 91.0891, 87.3345, 65.3729, 86.7354, 89.9558, 3.1401, 83.4993, 75.1529, 83.5419, 78.3002, 89.8564, 82.2419, 19.3794, 88.2163, 87.9032, 97.8686, 95.0742, 12.3542, 84.7324, 99.4753, 76.1753, 99.5386, 99.8664, 85.7785, 9.9933, 99.7167, 99.9328, 74.4693, 99.7531, 99.0579, 99.5994, 99.7785, 19.2743, 54.7251, 91.7269, 99.5033, 98.9247, 97.6214, 0.0001, 97.7027, 98.6832, 98.4691, 98.9759, 99.7087, 99.9244, 99.4908, 82.1103, 67.6125, 78.2363, 93.5725, 91.5612, 99.8865, 68.5426, 79.0635, 76.8951, 99.3555, 98.9196, 6.1157, 75.8655, 83.8525, 86.1269, 83.3388, 96.1854, 87.1961, 81.7453, 9.2689, 95.2765, 9.0809, 99.8599]
avgSuccess = sum(avgSolutionPercentages)/len(trajectoryIds)
y = solutionPercentages
#Plot
fig, ax = plt.subplots()
ax.hist(y)
ax.set_ylabel('Number of Motions (Total: '+ str(len(trajectoryIds)) + ')')
ax.set_xlabel('Planning Solution (%)')
ax.set_title('Planning Success Rate (Avg: ' + str(round(avgSuccess,2)) + '%)')
plt.legend(loc='upper left')
plt.show()
So I found out how to make the values on the x axis more precise: I changed ax.hist(y) to ax.hist(y, bins = 1000). But that didn't really work out well either:
So now I need to:
get rid of the empty space between my bars (is there a way to get rid of these empty x values?)
while keeping all bars at the same width
change the precision anytime, e.g., from 1 to 0,01 step for each bar
Just any suggestions on how to make the plot (and code) look better are much appreciated :) Maybe it's not the .hist function that's best for this...but I don't know any better - failed doing this with a bar chart so far :(
How about something like
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import numpy as np
trajectoryIds = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0]
avgSolutionPercentages = [20.6256, 99.1448, 15.6764, 21.8231, 16.3733, 17.7502, 20.0055, 86.6873, 11.3105, 15.6693, 10.3449, 81.8921, 11.6745, 92.6031, 11.8787, 23.0229, 37.9636, 2.3903, 15.1727, 14.7088, 10.0426, 59.6758, 8.0042, 12.4174, 10.0585, 46.0567, 90.2376, 98.3273, 52.8645, 49.3027, 62.4136, 32.6199, 19.0642, 10.3319, 74.6157, 22.5771, 22.4118, 11.2017, 16.5053, 11.2021, 30.8376, 24.5255, 83.1072, 10.1529, 14.3991, 46.3459, 16.2137, 4.5773, 44.9549, 1.0719, 76.5605, 42.6589, 13.6209, 34.2856, 1.3574, 29.0465, 66.8146, 16.4796, 32.9564, 62.0732, 3.7047, 13.8828, 31.6088, 60.1141, 3.3247, 45.0796, 13.7862, 26.4498, 93.6806, 10.3245, 62.5157, 10.9833, 42.5908, 37.3208, 27.4115, 84.1648, 13.9058, 13.9065, 67.8918, 27.9075, 3.6116, 10.9091, 41.0988, 24.2177, 50.2762, 61.3869, 15.5915, 27.6536, 0.7993, 22.9483, 22.3393, 88.1832, 25.1604, 18.3625, 15.7212, 56.9646, 4.0434, 11.8431, 56.0613, 32.5472, 97.8757, 21.8233, 14.8162, 38.8259, 20.5676, 72.7201, 17.7987, 35.8117, 15.1699, 17.0359, 14.0621, 35.9655, 11.9095, 10.5691, 23.3259, 16.1746, 10.1936, 12.5084, 24.1494, 16.4727, 21.0687, 15.7495, 28.8929, 11.0135, 13.3133, 14.6639, 50.1304, 21.0346, 5.1604, 53.5107, 20.0712, 41.5111, 12.1633, 74.3263, 17.7904, 17.1684, 25.3977, 21.5871, 21.9332, 22.6674, 36.6634, 99.1179, 15.3213, 16.3999, 12.0147, 57.5163, 4.2062, 17.3874, 10.7132, 17.4919, 17.8457, 29.3538, 26.1468, 75.1234, 16.4368, 21.6191, 61.1394, 12.9972, 73.5746, 72.5788, 41.6835, 39.9912, 20.1648, 11.7097, 11.5203, 36.7387, 5.0694, 30.8129, 12.0922, 22.5419, 12.3569, 54.6776, 28.3561, 26.1219, 44.7455, 1.3281, 46.5064, 13.6016, 23.5483, 11.7151, 44.3669, 3.2577, 75.0943, 10.8634, 14.8226, 45.7661, 19.7319, 30.7981, 3.5965, 47.8161, 14.5996, 39.4484, 13.0693, 24.9947, 97.4253, 76.7901, 73.1183, 4.0922]
solutionPercentages = [99.2537, 99.8467, 96.4718, 99.6637, 99.6633, 97.1289, 9.7373, 99.5126, 97.3251, 96.0545, 99.6756, 75.6587, 61.1496, 96.7575, 97.1969, 96.5258, 99.7409, 99.8641, 99.8821, 98.5401, 99.7833, 99.6314, 99.7899, 99.9117, 99.5754, 99.5868, 99.7919, 99.9127, 0.0001, 99.7297, 40.8438, 99.8559, 99.6591, 99.8917, 99.3622, 0.0001, 0.0001, 99.4828, 0.0001, 99.8559, 0.0001, 0.0001, 99.6714, 9.9635, 99.8744, 93.8854, 67.3692, 96.3229, 98.4899, 66.9173, 98.2533, 99.8318, 73.9904, 99.8431, 6.2614, 97.2776, 96.0938, 71.9457, 99.9211, 96.1596, 99.8405, 99.6314, 95.4566, 98.4786, 99.8217, 96.1014, 99.0391, 94.6034, 99.8403, 99.9093, 9.8096, 97.8549, 98.7041, 19.9098, 86.3154, 21.5302, 99.2769, 99.0496, 99.7266, 99.8602, 86.7925, 96.3197, 99.9226, 9.4447, 97.9722, 50.4884, 92.2358, 87.4311, 74.2156, 97.8819, 93.2483, 96.3186, 77.9828, 80.2446, 47.1835, 40.8011, 90.5123, 85.7852, 9.8074, 95.9032, 98.5906, 12.5081, 97.0264, 9.9166, 73.6486, 97.8634, 8.4403, 97.7592, 97.9933, 95.8486, 49.7977, 95.1031, 76.1712, 96.1552, 89.0059, 79.6172, 96.7383, 90.8518, 95.8096, 98.2061, 96.3314, 97.5753, 97.9857, 9.0739, 66.9977, 86.5744, 76.8124, 8.6195, 81.3285, 91.0891, 87.3345, 65.3729, 86.7354, 89.9558, 3.1401, 83.4993, 75.1529, 83.5419, 78.3002, 89.8564, 82.2419, 19.3794, 88.2163, 87.9032, 97.8686, 95.0742, 12.3542, 84.7324, 99.4753, 76.1753, 99.5386, 99.8664, 85.7785, 9.9933, 99.7167, 99.9328, 74.4693, 99.7531, 99.0579, 99.5994, 99.7785, 19.2743, 54.7251, 91.7269, 99.5033, 98.9247, 97.6214, 0.0001, 97.7027, 98.6832, 98.4691, 98.9759, 99.7087, 99.9244, 99.4908, 82.1103, 67.6125, 78.2363, 93.5725, 91.5612, 99.8865, 68.5426, 79.0635, 76.8951, 99.3555, 98.9196, 6.1157, 75.8655, 83.8525, 86.1269, 83.3388, 96.1854, 87.1961, 81.7453, 9.2689, 95.2765, 9.0809, 99.8599]
avgSuccess = sum(avgSolutionPercentages)/len(trajectoryIds)
y = solutionPercentages
BIN_COUNT = 15
BAR_WIDTH = 0.75
fig, ax = plt.subplots()
# use numpy histogram so we can perform filtering
hist, bin_edges = np.histogram(y, bins=BIN_COUNT)
# so we can remove bins with zero entries
non_zero = np.nonzero(hist)
# take only entries where bin is non-zero
hist = hist[non_zero]
bin_edges = bin_edges[non_zero]
# generate labels based on bin edge values (maybe use centers?)
x_ticks = [str(int(edge)) for edge in bin_edges]
indices = np.arange(len(bin_edges))
plt.bar(indices, hist, BAR_WIDTH, align='center')
plt.xticks(indices, x_ticks)
ax.set_ylabel('Number of Motions (Total: '+ str(len(trajectoryIds)) + ')')
ax.set_xlabel('Planning Solution (%)')
ax.set_title('Planning Success Rate (Avg: ' + str(round(avgSuccess,2)) + '%)')
plt.show()
which produces the plot
You may use some nonlinear dependence of the bin width, e.g.
b = 5
bins = (np.linspace(np.min(y)**b, np.max(y)**b))**(1/b)
fig, ax = plt.subplots()
ax.hist(y, bins=bins, edgecolor="k")
Or you may define the bins completely customized, e.g. use a bin width of 10 up to 60 and then use a bin width of 5 till 90, finally use a bin with of 1 till 100.
bins = np.concatenate((np.linspace(0,60,7),
np.linspace(60,90,7),
np.linspace(90,100,11)))
fig, ax = plt.subplots()
ax.hist(y, bins=bins, edgecolor="k")
I'm working on a program that can calculate the slope using the linregress native scipyy function, but I'm getting two errors (depending on how I try to fix it). The two lists should be two-dimensional, basically x and y values.
from __future__ import division
from scipy.stats import linregress
import matplotlib.pyplot as mplot
import numpy as np
xs=[[20.0, 80.0, 45.0, 42.0, 93.0, 98.0, 65.0, 43.0, 72.0, 36.0, 9.0, 60.0, 47.0, 84.0, 31.0, 46.0, 57.0, 76.0, 27.0, 85.0, 0.0, 39.0, 2.0, 56.0, 68.0, 6.0, 41.0, 28.0, 61.0, 12.0, 32.0, 1.0, 54.0, 77.0, 18.0, 86.0, 62.0, 23.0, 30.0, 69.0, 4.0, 71.0, 64.0, 92.0, 24.0, 79.0, 8.0, 35.0, 49.0, 53.0, 7.0, 59.0, 70.0, 37.0, 13.0, 15.0, 73.0, 89.0, 96.0, 83.0, 22.0, 95.0, 19.0, 67.0, 5.0, 88.0, 38.0, 50.0, 55.0, 52.0, 81.0, 58.0, 11.0, 51.0, 99.0, 78.0, 25.0, 33.0, 40.0, 75.0, 3.0, 91.0, 48.0, 90.0, 82.0, 26.0, 10.0, 16.0, 21.0, 66.0, 14.0, 87.0, 74.0, 97.0, 94.0, 44.0, 29.0, 17.0, 63.0, 34.0], [87.0, 17.0, 69.0, 72.0, 76.0, 62.0, 20.0, 77.0, 5.0, 49.0, 81.0, 3.0, 24.0, 36.0, 44.0, 91.0, 99.0, 35.0, 43.0, 50.0, 12.0, 54.0, 46.0, 30.0, 37.0, 45.0, 90.0, 85.0, 70.0, 83.0, 38.0, 22.0, 23.0, 0.0, 60.0, 47.0, 26.0, 1.0, 95.0, 73.0, 65.0, 94.0, 84.0, 8.0, 34.0, 56.0, 66.0, 13.0, 75.0, 52.0, 19.0, 55.0, 67.0, 39.0, 21.0, 80.0, 98.0, 33.0, 11.0, 68.0, 40.0, 32.0, 2.0, 79.0, 82.0, 93.0, 96.0, 88.0, 14.0, 92.0, 41.0, 89.0, 28.0, 29.0, 42.0, 6.0, 86.0, 74.0, 58.0, 16.0, 31.0, 64.0, 15.0, 53.0, 25.0, 59.0, 61.0, 78.0, 51.0, 7.0, 57.0, 9.0, 97.0, 63.0, 48.0, 71.0, 18.0, 10.0, 4.0, 27.0]]
ys=[[155.506, 50.592, 104.447, 111.318, 36.148, 36.87, 74.266, 106.413, 58.341, 122.563, 180.555, 85.202, 96.84, 50.726, 126.56, 100.686, 88.303, 54.797, 138.487, 44.946, 200.9, 116.524, 193.652, 82.8, 65.823, 184.436, 113.738, 133.458, 83.765, 167.408, 129.491, 200.469, 89.238, 51.799, 159.217, 49.382, 78.443, 146.051, 129.045, 63.805, 185.564, 65.614, 74.243, 43.408, 140.863, 53.446, 182.767, 127.373, 94.494, 91.079, 187.194, 81.254, 68.702, 121.368, 164.756, 169.696, 59.483, 45.978, 33.057, 47.12, 154.755, 33.872, 160.754, 70.256, 190.393, 38.398, 113.188, 100.493, 84.511, 88.635, 49.353, 81.821, 178.876, 95.307, 32.2, 54.715, 141.389, 132.337, 109.673, 57.611, 189.251, 39.283, 97.31, 41.173, 47.529, 140.03, 173.058, 160.288, 154.773, 67.903, 164.718, 42.032, 60.739, 28.656, 34.302, 107.022, 137.344, 160.195, 73.636, 123.797], [14.138, 100.87, 30.287, 28.675, 21.826, 42.445, 97.938, 29.574, 125.976, 59.404, 26.609, 125.743, 95.329, 75.467, 59.497, 15.342, 9.834, 77.402, 65.019, 54.468, 112.64, 45.466, 55.197, 79.992, 71.146, 55.39, 14.795, 15.971, 28.535, 25.862, 73.239, 92.455, 87.635, 137.6, 38.59, 53.718, 86.26, 130.567, 11.274, 33.867, 40.035, 11.07, 16.109, 114.732, 76.552, 45.85, 31.827, 110.877, 26.292, 55.738, 101.801, 48.601, 33.632, 66.647, 98.39, 23.904, 11.172, 78.215, 109.417, 31.653, 68.368, 79.593, 124.548, 21.513, 19.828, 13.48, 9.993, 22.043, 108.229, 16.904, 66.704, 12.262, 79.947, 85.012, 66.754, 124.114, 17.548, 25.872, 45.392, 101.775, 78.085, 36.358, 101.795, 52.045, 87.637, 42.784, 37.011, 26.036, 50.146, 119.666, 42.514, 113.313, 9.125, 42.394, 51.954, 26.898, 96.678, 112.108, 125.252, 86.296]]
slope, intercept, r_value, std_err = linregress(xs,ys)
print(slope)
My error is:
in linregress
ssxm, ssxym, ssyxm, ssym = np.cov(x, y, bias=1).flat
ValueError: too many values to unpack (expected 4)
I've tried changing my code to something like this:
slope, intercept, r_value, std_err = linregress(xs[:,0], ys[:,0])
But then my error becomes a TypeError:
TypeError: list indices must be integers or slices, not tuple
Does anyone have any suggestions? Perhaps there's something I don't understand about the use of the linregress function. I'm sure my first error has to do with my lists being 2D. For the second error, I'm lost.
You have two problems:
When interpreted as arrays, your variables xs and ys are two-dimensional with shape (2, 100). When linregress is given both arguments x and y, it expects them to be one-dimensional arrays.
As you can see in the "Returns" section of the docstring, linregress returns five values, not four.
You'll have to call linregress twice, and handle the five return values. For example,
In [144]: slope, intercept, rvalue, pvalue, stderr = linregress(xs[0], ys[0])
In [145]: slope, intercept, rvalue
Out[145]: (-1.7059670627062702, 187.5658196039604, -0.9912859597363385)
In [146]: slope, intercept, rvalue, pvalue, stderr = linregress(xs[1], ys[1])
In [147]: slope, intercept, rvalue
Out[147]: (-1.2455432103210327, 121.51968891089112, -0.9871123119133126)
Here is what I am trying to do:
Create a sqlite table from a csv (attached).
Add a new column for uptime percentage
Calculate uptime percentage and update new column. (status_up / group_total)
As you can see from the output, the percentage results are invalid. I'm guessing there is still something wrong with my alter_db function, but I haven't been able to find the correct way to do this in the documentation.
Any advice is greatly appreciated!
import csv
import sqlite3
import sys
sys.path.append('/home/run/today/')
db = sqlite3.connect(':memory:')
def init_db(cur):
cur.execute('''CREATE TABLE groupstats (
group_name TEXT,
group_alias TEXT,
group_total REAL,
status_up REAL,
status_down REAL)''')
def populate_db(cur, csv_fp):
rdr = csv.reader(csv_fp)
cur.executemany('''
INSERT INTO groupstats (group_name, group_alias, group_total, status_up, status_down)
VALUES (?,?,?,?,?)''', rdr)
def alter_db(cur):
cur.execute('''ALTER TABLE groupstats ADD COLUMN percent_up REAL''')
def update_db(cur):
cur.execute('''UPDATE groupstats SET percent_up = round(2, status_up / group_total)''')
cur = db.cursor()
init_db(cur)
populate_db(cur, open('/home/run/today/service_stats.csv'))
Here is the csv:
group_name,group_alias,group_total,status_up,status_down
Group_1,Group_1 Alias,22,18,4
Group_2,Group_2 Alias,7,7,0
Group_3,Group_3 Alias,22,22,0
Group_4,Group_4 Alias,14,14,0
Group_5,Group_5 Alias,50,21,29
Group_6,Group_6 Alias,30,30,0
Group_7,Group_7 Alias,3,2,1
Group_8,Group_8 Alias,10,10,0
Group_9,Group_9 Alias,10,10,0
Group_10,Group_10 Alias,12,11,1
Group_11,Group_11 Alias,9,9,0
Group_12,Group_12 Alias,1,1,0
Group_13,Group_13 Alias,16,14,2
Group_14,Group_14 Alias,18,8,10
Group_15,Group_15 Alias,25,24,1
Group_16,Group_16 Alias,2,2,0
Group_17,Group_17 Alias,8,1,7
Group_18,Group_18 Alias,6,1,5
Group_19,Group_19 Alias,2,2,0
Group_20,Group_20 Alias,44,39,5
Group_21,Group_21 Alias,12,12,0
Group_22,Group_22 Alias,8,8,0
Group_23,Group_23 Alias,8,8,0
Group_24,Group_24 Alias,56,54,2
Group_25,Group_25 Alias,3,1,2
Group_26,Group_26 Alias,14,12,2
Group_27,Group_27 Alias,10,10,0
Group_28,Group_28 Alias,2,2,0
Group_29,Group_29 Alias,2,1,1
Group_30,Group_30 Alias,23,23,0
query = 'SELECT * FROM groupstats'
cur.execute(query)
cur.fetchall()
[(u'group_name', u'group_alias', u'group_total', u'status_up', u'status_down'), (u'Group_1', u'Group_1 Alias', 22.0, 18.0, 4.0), (u'Group_2', u'Group_2 Alias', 7.0, 7.0, 0.0), (u'Group_3', u'Group_3 Alias', 22.0, 22.0, 0.0), (u'Group_4', u'Group_4 Alias', 14.0, 14.0, 0.0), (u'Group_5', u'Group_5 Alias', 50.0, 21.0, 29.0), (u'Group_6', u'Group_6 Alias', 30.0, 30.0, 0.0), (u'Group_7', u'Group_7 Alias', 3.0, 2.0, 1.0), (u'Group_8', u'Group_8 Alias', 10.0, 10.0, 0.0), (u'Group_9', u'Group_9 Alias', 10.0, 10.0, 0.0), (u'Group_10', u'Group_10 Alias', 12.0, 11.0, 1.0), (u'Group_11', u'Group_11 Alias', 9.0, 9.0, 0.0), (u'Group_12', u'Group_12 Alias', 1.0, 1.0, 0.0), (u'Group_13', u'Group_13 Alias', 16.0, 14.0, 2.0), (u'Group_14', u'Group_14 Alias', 18.0, 8.0, 10.0), (u'Group_15', u'Group_15 Alias', 25.0, 24.0, 1.0), (u'Group_16', u'Group_16 Alias', 2.0, 2.0, 0.0), (u'Group_17', u'Group_17 Alias', 8.0, 1.0, 7.0), (u'Group_18', u'Group_18 Alias', 6.0, 1.0, 5.0), (u'Group_19', u'Group_19 Alias', 2.0, 2.0, 0.0), (u'Group_20', u'Group_20 Alias', 44.0, 39.0, 5.0), (u'Group_21', u'Group_21 Alias', 12.0, 12.0, 0.0), (u'Group_22', u'Group_22 Alias', 8.0, 8.0, 0.0), (u'Group_23', u'Group_23 Alias', 8.0, 8.0, 0.0), (u'Group_24', u'Group_24 Alias', 56.0, 54.0, 2.0), (u'Group_25', u'Group_25 Alias', 3.0, 1.0, 2.0), (u'Group_26', u'Group_26 Alias', 14.0, 12.0, 2.0), (u'Group_27', u'Group_27 Alias', 10.0, 10.0, 0.0), (u'Group_28', u'Group_28 Alias', 2.0, 2.0, 0.0), (u'Group_29', u'Group_29 Alias', 2.0, 1.0, 1.0), (u'Group_30', u'Group_30 Alias', 23.0, 23.0, 0.0)]
alter_db(cur)
cur.execute(query)
cur.fetchall()
[(u'group_name', u'group_alias', u'group_total', u'status_up', u'status_down', None), (u'Group_1', u'Group_1 Alias', 22.0, 18.0, 4.0, None), (u'Group_2', u'Group_2 Alias', 7.0, 7.0, 0.0, None), (u'Group_3', u'Group_3 Alias', 22.0, 22.0, 0.0, None), (u'Group_4', u'Group_4 Alias', 14.0, 14.0, 0.0, None), (u'Group_5', u'Group_5 Alias', 50.0, 21.0, 29.0, None), (u'Group_6', u'Group_6 Alias', 30.0, 30.0, 0.0, None), (u'Group_7', u'Group_7 Alias', 3.0, 2.0, 1.0, None), (u'Group_8', u'Group_8 Alias', 10.0, 10.0, 0.0, None), (u'Group_9', u'Group_9 Alias', 10.0, 10.0, 0.0, None), (u'Group_10', u'Group_10 Alias', 12.0, 11.0, 1.0, None), (u'Group_11', u'Group_11 Alias', 9.0, 9.0, 0.0, None), (u'Group_12', u'Group_12 Alias', 1.0, 1.0, 0.0, None), (u'Group_13', u'Group_13 Alias', 16.0, 14.0, 2.0, None), (u'Group_14', u'Group_14 Alias', 18.0, 8.0, 10.0, None), (u'Group_15', u'Group_15 Alias', 25.0, 24.0, 1.0, None), (u'Group_16', u'Group_16 Alias', 2.0, 2.0, 0.0, None), (u'Group_17', u'Group_17 Alias', 8.0, 1.0, 7.0, None), (u'Group_18', u'Group_18 Alias', 6.0, 1.0, 5.0, None), (u'Group_19', u'Group_19 Alias', 2.0, 2.0, 0.0, None), (u'Group_20', u'Group_20 Alias', 44.0, 39.0, 5.0, None), (u'Group_21', u'Group_21 Alias', 12.0, 12.0, 0.0, None), (u'Group_22', u'Group_22 Alias', 8.0, 8.0, 0.0, None), (u'Group_23', u'Group_23 Alias', 8.0, 8.0, 0.0, None), (u'Group_24', u'Group_24 Alias', 56.0, 54.0, 2.0, None), (u'Group_25', u'Group_25 Alias', 3.0, 1.0, 2.0, None), (u'Group_26', u'Group_26 Alias', 14.0, 12.0, 2.0, None), (u'Group_27', u'Group_27 Alias', 10.0, 10.0, 0.0, None), (u'Group_28', u'Group_28 Alias', 2.0, 2.0, 0.0, None), (u'Group_29', u'Group_29 Alias', 2.0, 1.0, 1.0, None), (u'Group_30', u'Group_30 Alias', 23.0, 23.0, 0.0, None)]
update_db(cur)
cur.execute(query)
cur.fetchall()
[(u'group_name', u'group_alias', u'group_total', u'status_up', u'status_down', None), (u'Group_1', u'Group_1 Alias', 22.0, 18.0, 4.0, 2.0), (u'Group_2', u'Group_2 Alias', 7.0, 7.0, 0.0, 2.0), (u'Group_3', u'Group_3 Alias', 22.0, 22.0, 0.0, 2.0), (u'Group_4', u'Group_4 Alias', 14.0, 14.0, 0.0, 2.0), (u'Group_5', u'Group_5 Alias', 50.0, 21.0, 29.0, 2.0), (u'Group_6', u'Group_6 Alias', 30.0, 30.0, 0.0, 2.0), (u'Group_7', u'Group_7 Alias', 3.0, 2.0, 1.0, 2.0), (u'Group_8', u'Group_8 Alias', 10.0, 10.0, 0.0, 2.0), (u'Group_9', u'Group_9 Alias', 10.0, 10.0, 0.0, 2.0), (u'Group_10', u'Group_10 Alias', 12.0, 11.0, 1.0, 2.0), (u'Group_11', u'Group_11 Alias', 9.0, 9.0, 0.0, 2.0), (u'Group_12', u'Group_12 Alias', 1.0, 1.0, 0.0, 2.0), (u'Group_13', u'Group_13 Alias', 16.0, 14.0, 2.0, 2.0), (u'Group_14', u'Group_14 Alias', 18.0, 8.0, 10.0, 2.0), (u'Group_15', u'Group_15 Alias', 25.0, 24.0, 1.0, 2.0), (u'Group_16', u'Group_16 Alias', 2.0, 2.0, 0.0, 2.0), (u'Group_17', u'Group_17 Alias', 8.0, 1.0, 7.0, 2.0), (u'Group_18', u'Group_18 Alias', 6.0, 1.0, 5.0, 2.0), (u'Group_19', u'Group_19 Alias', 2.0, 2.0, 0.0, 2.0), (u'Group_20', u'Group_20 Alias', 44.0, 39.0, 5.0, 2.0), (u'Group_21', u'Group_21 Alias', 12.0, 12.0, 0.0, 2.0), (u'Group_22', u'Group_22 Alias', 8.0, 8.0, 0.0, 2.0), (u'Group_23', u'Group_23 Alias', 8.0, 8.0, 0.0, 2.0), (u'Group_24', u'Group_24 Alias', 56.0, 54.0, 2.0, 2.0), (u'Group_25', u'Group_25 Alias', 3.0, 1.0, 2.0, 2.0), (u'Group_26', u'Group_26 Alias', 14.0, 12.0, 2.0, 2.0), (u'Group_27', u'Group_27 Alias', 10.0, 10.0, 0.0, 2.0), (u'Group_28', u'Group_28 Alias', 2.0, 2.0, 0.0, 2.0), (u'Group_29', u'Group_29 Alias', 2.0, 1.0, 1.0, 2.0), (u'Group_30', u'Group_30 Alias', 23.0, 23.0, 0.0, 2.0)]
The documentation says:
The round(X,Y) function returns a floating-point value X rounded to Y digits to the right of the decimal point.
In your program, it returns the value 2, rounded to status_up / group_total digits.