from datetime import datetime
import datetime as dt
import csv
import matplotlib.dates as mdates
import numpy as np
import matplotlib.pyplot as plt
ldays = list()
aesvalues = list()
tezvalues = list()
vdevalues = list()
with open('data/dg_year_50.csv', newline='') as csvfile:
has_header = csv.Sniffer().has_header(csvfile.read(1024))
csvfile.seek(0) # Rewind.
datareader = csv.reader(csvfile, delimiter=',', quotechar='|')
if has_header:
next(datareader) # Skip header row.
for row in datareader:
date_cell = row[0]
aes_cell = row[1]
tez_cell = row[2]
vde_cell = row[3]
datetime_object = datetime.strptime(date_cell, '%d.%m.%Y %H:%M:%S')
aesvalues.append(aes_cell)
tezvalues.append(tez_cell)
vdevalues.append(tez_cell)
ldays.append(datetime_object)
days = mdates.date2num(np.array(ldays))
s_aesvalues = sorted(list(map(int, aesvalues)))
s_tezvalues = sorted(list(map(int, tezvalues)))
s_vdevalues = sorted(list(map(int, vdevalues)))
# These are the colors that will be used in the plot
color_sequence = ['#ffbb78', '#e377c2', '#1f77b4', '#ff7f0e', '#2ca02c',
'#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
'#8c564b', '#c49c94', '#aec7e8', '#e377c2', '#f7b6d2',
'#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']
# make a plot
fig, ax = plt.subplots(figsize=(14, 12))
# plot the data
majors = [s_tezvalues, s_vdevalues, s_aesvalues]
for rank, column in enumerate(majors):
line = plt.plot_date(x=days, y=majors[rank], lw=2.5, color=color_sequence[rank])
fig.suptitle('Дані по енергетиці', fontsize=18, ha='center')
plt.xlabel('Дата', fontsize=14, ha='center')
plt.ylabel('Кількість енергії (Вт)', fontsize=14, ha='center')
plt.show()
I have 'data/dg_year_50.csv' with 4 columns (Дата,АЕС,ТЕЦ,ВДЕ).
First one stands for "Date" and rest 3 for the appropriate values that needs to be visualized as a lines in plot.
I do not understand why I'm getting visualized 2 lines instead of 3.
The result visualization looks like this:
I found the mistake, it was in the following string:
vdevalues.append(tez_cell)
It should be changed like this:
vdevalues.append(vde_cell)
And yes, definitely, with pandas solution is much easier:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv('data/dg_year_50.csv', index_col=0, parse_dates=True).apply(np.int64)
data.plot()
plt.show()
Attaching the result figure:
Related
Here is the result. As you can see the order is 1,2,4,3 it should be 1,2,3,4. Here how i render it
import csv
import matplotlib.pyplot as plt
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(row[0])
total.append(row[1])
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.show()
and here is my csv
2022-08-25,1
2022-08-26,2
2022-08-27,4
2022-08-29,3
How can i fix it ? thanks in advance
The problem with your code is that the y-values are strings. This simple casting to float would fix that:
import matplotlib.pyplot as plt
import csv
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(row[0])
total.append(float(row[1]))
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.show()
But notice there is also the same problem with dates, last two dates are not one day from each other but twice as much. Matplotlib know how to handle datetimes though:
import csv
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(dt.strptime(row[0], "%Y-%m-%d"))
total.append(float(row[1]))
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
plt.gcf().autofmt_xdate()
plt.show()
Little help taken from: Format of datetime in pyplot axis
How to convert str to date? I also want the date only appears mm/yy as the x-axis in the plot.
The data shows as below.
21/12/2020, 0,
22/12/2020, 1,
23/12/2020, 0,
24/12/2020, 0,
25/12/2020, 1,
Below is the code I used.
import numpy as np
import matplotlib.dates as mdates
from datetime import datetime as dt
import csv
with open('p211.csv', newline='') as f:
reader = csv.reader(f)
rain = []
for row in reader:
rain_t = float(row[1])
rain.append(rain_t)
import csv
with open('p211.csv', newline='') as f:
reader = csv.reader(f)
date = []
for row in reader:
date_t=str(row[0])
date.append(date_t)
P=np.array(rain)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(20, 12))
ax.bar(x=date, height=P, color='royalblue')
ax.legend()
ax.set_ylabel('Inputs [mm/day]')
plt.show()
using datetime package
from datetime import datetime
datetime.strptime(dtime, '%d/%m/%Y') # dtime = '21/12/2020'
I am writing a script which can be used to plot the country wise covid time-series data. It is working fine when I plot a single country but The scale at Y-axis is in appropriately printed.
Plot which I am getting The Problem is after printing the maximum value for one country the y axis is extrapolated with smaller values to plot the data points of subsequent countries.
The code for my script is as follows
import requests
from contextlib import closing
import csv
import matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
def prepareCountryWiseData(country):
countryWise = {}
with closing(requests.get(url, stream=True)) as r:
f = (line.decode('utf-8') for line in r.iter_lines())
reader = csv.reader(f, delimiter=',', quotechar='"')
active = []
recovered = []
dates = []
for row in reader:
if row[1] == country:
dates.append(row[0])
active.append(row[2])
recovered.append(row[3])
return (dates, active, recovered)
def plotCountryWiseData(countryList):
plotable = []
for country in countryList:
dates,active,recovered = (prepareCountryWiseData(country))
plt.plot(active)
plt.ylabel('active_cases')
plt.legend(countryList)
plt.show()
plotCountryWiseData(['India','US','Italy'])
If you can use the pandas module your job would be much easier:
import pandas as pd, matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
df = pd.read_csv(url)
fig,ax = plt.subplots()
for k,g in df[df['Country'].isin(['India','US','Italy'])].groupby('Country'):
ax = g.plot(ax=ax,kind='line',x='Date',y='Confirmed',label=k)
plt.gcf().suptitle('Active Cases')
plt.show()
Result:
I basically want to convert the unix time values I have in the first row of my text file named 'EKTE9' to date. I think using the datetime library is the way to go, but I don't know how to implement that in my code. Some answers would be appreciated.
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('EKTE9.txt','r') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(int(row[0]))
y.append(float(row[3]))
plt.plot(x,y, label='Temperatur')
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
Here are some values from the EKTE9.txt file:
1554058225,0.80,2.90,13.60,27.20
1554058525,0.30,0.80,9.60,26.70
The values goes on for about 200 lines
That did the job. Thank you very much Martin Evans.
Fina code:
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import csv
x = []
y = []
with open('EKTE9.txt','r') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(matplotlib.dates.date2num(datetime.fromtimestamp(int(row[0]))))
y.append(float(row[3]))
hfmt = matplotlib.dates.DateFormatter('%d\n%H:%M')
plt.plot(x,y, label='Temperatur')
plt.gca().xaxis.set_major_formatter(hfmt)
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
You would first create a datetime object using datetime.fromtimestamp(). This can then be converted to a matplotlib number using date2num(). Finally, you should use a DateFormatter() to help with displaying the x-axis.
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import csv
x = []
y = []
with open('EKTE9.txt', 'r', newline='') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(matplotlib.dates.date2num(datetime.fromtimestamp(int(row[0]))))
y.append(float(row[3]))
hfmt = matplotlib.dates.DateFormatter('%d\n%H:%M')
plt.plot(x,y, label='Temperatur')
plt.gca().xaxis.set_major_formatter(hfmt)
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
This would then show your two values as follows:
I have data like:
Machine_id Cycling Idle
81091001 41000000000 19000000000
81091001 40000000000 19000000000
81091001 41000000000 19000000000
81091001 41000000000 20000000000
81091001 41000000000 19000000000
Code for plotting Pie chart :
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
df = df.set_index('Machine_id')
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
I am getting a error here like:
IndexError: single positional indexer is out-of-bounds
Then how a pie chart can be formed for Cycling v/s Idle in pandas each Machine_id wise ?
Here is your problem solved:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(palette='Paired')
df = pd.read_csv('sample1.csv')
#df = df.set_index('Machine_id') comment this
for ind in df.index:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(5,5)
df.iloc[ind].plot(kind='pie', ax=ax, autopct='%1.1f%%')
ax.set_ylabel('')
ax.set_xlabel('')
fig.show() #plot/show final results
another way, to consider individual chart with Cycling and Idle time per row. A Pie Chart for each line. (Maybe Pie Charts are not the best way to illustrate this but any way)
Ref. https://matplotlib.org/api/pyplot_api.html
import csv as csv
import matplotlib.pyplot as plt
colors = ['r', 'g']
with open('sample1.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
i = 0
for row in readCSV:
if i == 0:
activities = [row[1], row[2]]
title = row[0]
else:
slices = [row[1], row[2]]
plt.title("Machine ID: " + row[0]) #title is here UPDATED
plt.pie(slices, labels=activities, colors=colors, startangle=90, autopct='%.1f%%')
plt.show()
i += 1