I am writing a script which can be used to plot the country wise covid time-series data. It is working fine when I plot a single country but The scale at Y-axis is in appropriately printed.
Plot which I am getting The Problem is after printing the maximum value for one country the y axis is extrapolated with smaller values to plot the data points of subsequent countries.
The code for my script is as follows
import requests
from contextlib import closing
import csv
import matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
def prepareCountryWiseData(country):
countryWise = {}
with closing(requests.get(url, stream=True)) as r:
f = (line.decode('utf-8') for line in r.iter_lines())
reader = csv.reader(f, delimiter=',', quotechar='"')
active = []
recovered = []
dates = []
for row in reader:
if row[1] == country:
dates.append(row[0])
active.append(row[2])
recovered.append(row[3])
return (dates, active, recovered)
def plotCountryWiseData(countryList):
plotable = []
for country in countryList:
dates,active,recovered = (prepareCountryWiseData(country))
plt.plot(active)
plt.ylabel('active_cases')
plt.legend(countryList)
plt.show()
plotCountryWiseData(['India','US','Italy'])
If you can use the pandas module your job would be much easier:
import pandas as pd, matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
df = pd.read_csv(url)
fig,ax = plt.subplots()
for k,g in df[df['Country'].isin(['India','US','Italy'])].groupby('Country'):
ax = g.plot(ax=ax,kind='line',x='Date',y='Confirmed',label=k)
plt.gcf().suptitle('Active Cases')
plt.show()
Result:
Related
enter image description here
Hi everyone, I'm trying to plot a graph data from CSV. There are 7 columns in my CSV. I've already plot the Genre column with my code:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
df = pd.read_csv('booksellers.csv')
genre = df['Genre']
countFiction = 0
countNonFiction = 0
for i in genre:
if i == "Fiction":
countFiction+=1
else:
countNonFiction+=1
labels = 'Fiction','Non Fiction'
sizes = [countFiction,countNonFiction]
fig1, ax1 = plt.subplots()
ax1.pie(sizes,labels=labels,startangle=90,autopct='%1.1f%%')
plt.show()
Now, I want to plot another 2 columns which are 'Author' and the average of 'User Rating'. If the Author is duplicated, how can I get only one Author with their average user rating? And also what kind of graph is compatible with it?
# you can iterate line by line
from statistics import mean
data = {}
for index, row in df.iterrows():
author = row['Author']
if not author in data:
data[author] = {'rating':[]}
data[author].append(row['User Rating'])
rates_by_authors = {}
for k in data.keys()
rates_by_authors[k] = means(data[k])
# after create the data with that code
# you can use list(rates_by_authors.keys()) that is author's list as a X axis
# you can use list(rates_by_authors.values() ) that is average of ratings by authors list as a Y axis
I have data that this script outputs as a scatterplot of ID_all[:,0],ID_all[:,1], but I want to also save the data used for the scatter plot as two columns (as a csv file). Any help really appreciated! Thanks in advance.
More Info:
Here is a link to the initial csv data so that you can see my problem (only first two columns used).
https://www.dropbox.com/s/966iviv01tccv1k/contacts.csv?dl=0
If you plot the input csv file (see figure 1), you will see that it only has half of the values that I need. So when I plot this with the below script, I transpose the 'matrix', making a mirror image of this vrt the main diagonal. Now with this script I plot both the original and the transpose (figure 2), but I somehow cannot figure out how to just output columns of data in addition to the figure.
import numpy as np
import matplotlib.pyplot as plt
def parsefile(filename):
i = 0
print('Opening File')
try:
for line in open(filename):
line = line.split()
ID = np.hstack((int(line[0]), int(line[1])))
if i==0:
ID_list = ID
i+=1
if i>0:
ID_list = np.vstack((ID_list, ID))
print('File Opened')
return ID_list
except:
return ID_list
def get_scatter_data(ID_list):
ID_swapped1 = ID_list[:, 1]
ID_swapped2 = ID_list[:, 0]
ID_swapped = np.transpose([ID_swapped1.T, ID_swapped2.T])
N = np.max(ID_list)
diagonal = np.linspace(1,N,N)
ID_diagonal = np.transpose([diagonal.T,diagonal.T])
ID_all = np.vstack((ID_list,ID_swapped,ID_diagonal))
return ID_all
def plot_contactmap(ID_all, savefilename):
f = plt.figure()
plt.scatter(ID_all[:,0],ID_all[:,1], marker='o', s=0.01)
plt.axis('square')
plt.xlabel('Residue Number', fontsize = 14)
plt.ylabel('Residue Number', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.show()
f.savefig(savefilename+'.png', bbox_inches='tight')
f.savefig(savefilename+'.pdf', bbox_inches='tight')
def main(filename, savefilename):
ID_list = parsefile(filename+'.csv')
ID_all = get_scatter_data(ID_list)
plot_contactmap(ID_all, savefilename)
main('contacts', 'test')
You can save using numpy.savetxt() method
import numpy as np
ID_all = np.array([[1,2],[3,4],[5,6]])
np.savetxt('output.csv', ID_all, delimiter=',', fmt="%d")
If you want to give column names you can use header argument np.savetxt() method.
np.savetxt('output.csv', ID_all, delimiter=',', header='first, second', fmt="%d")
My goal is to use the sorted result data to plot "Month vs Mean Temp" graph for each year on the same window.
I've sorted the first two columns that have the year and the month respectively and then saved the new sorted data into a file called NewFile, but I can't seem to get to a solution here, I used csv reader and now I'm using numpy,
Code:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
csv1 = open('Data_5.1.csv')
data = np.array(list(csv.reader(csv1,delimiter=',').astype("string")
year = data[:,0]
mounth = data[:,1]
temp= data[:,3]
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(year, mounth, label='mounth/year')
ax.plot(year, temp, label='year/temp')
plt.legend()
But it just throws an error saying:
File "<ipython-input-282-282e91df631f>", line 9
year = data[:,0]
^
SyntaxError: invalid syntax
I will put two links to the files, the Data_5.1 and the NewFile respectively
Data_5.1
NewFile
1 - You didn't close brackets in line 6, hence you are getting the error in line 8.
2 - astype("string") is not needed in line 6.
I fixed your code, but you will have to complete the subplotting. Good luck!
import numpy as np
import matplotlib.pyplot as plt
import csv
plt.style.use('ggplot')
csv1 = open('Data_5.1.csv')
data = np.array(list(csv.reader(csv1,delimiter=',')))
year = data[:,0]
mounth = data[:,1]
temp= data[:,3]
fig, ax = plt.subplots(2,2) #This will create 4X4 subplots in one window
ax[0,0].plot(year, mounth, label='mounth/year') #This will plot in the 0,0 subplot
ax[0,1].plot(year, temp, label='year/temp') #This will plot in the 0,1 subplot
'''
For you to continue.
'''
plt.legend()
plt.show()
Your data is in a CSV file, and it's non-homogenous in type. Pandas is really the more appropriate tool for this.
I had to adapt your CSV slightly due to encoding errors, here is what it ended up looking like:
year,Month,Other Month,temperature_C
2003,Jan.,Some val,17.7
2004,Jan.,Some val,19.5
2005,Jan.,Some val,17.3
2006,Jan.,Some val,17.8
...
Here is a general sketch of what the code you shared could look like after the refactoring:
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')
# csv1 = open('Data_5.1.csv')
# data = np.array(list(csv.reader(csv1,delimiter=',').astype("string")
df_1 = pd.read_csv('../resources/Data_5.1.csv', header=0, names=['year', 'month', 'some_col', 'temp'],
dtype={'some_col': str, 'temp': float, 'month': str, 'year': str})
year = df_1['year']
month = df_1['month']
temp = df_1['temp']
fig, ax = plt.subplots(figsize=(10, 10))
ax.plot(year, month, label='month/year')
ax.plot(year, temp, label='year/temp')
plt.show()
Let me know if you have any questions :)
I have big data as csv file which has too many dates, so when I plot it, x axis writes all of them, like f.e : from 2000-12-24 to 2017-12-24 and also y axis.
I have tried to use a set, but that set needs to sort and problem is that when I sort it the data from Y isn't for any of sorted dates.
import matplotlib.pyplot as plt
import urllib as u
import numpy as np
import csv
stock_price_url = 'https://pythonprogramming.net/yahoo_finance_replacement'
date = []
openp = []
high = []
low = []
close = []
adjclose = []
volume = []
text = u.request.urlopen(stock_price_url).read().decode()
with open('nw.csv', 'w') as fw:
fw.write(text)
fw.close()
with open('nw.csv', 'r') as csvf:
f = csv.reader(csvf, delimiter=',')
for row in f:
if 'Date' not in row:
date.append(row[0])
openp.append(row[1])
high.append(row[2])
low.append(row[3])
close.append(row[4])
adjclose.append(row[5])
volume.append(row[6])
dateset = set([])
for z in date:
dateset.add(z[:4])
highset = []
for z in high:
highset.append(z[:3])
plt.plot(set(dateset), set(highset), linewidth=0.5)
plt.show()
You need to convert the dates first into a Python datetime object. This can then be converted into a matplotlib number. With this you can then tell matplotlib to add ticks based on year or month changes:
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt
import urllib as u
import numpy as np
import csv
stock_price_url = 'https://pythonprogramming.net/yahoo_finance_replacement'
date = []
high = []
text = u.request.urlopen(stock_price_url).read().decode()
with open('nw.csv', 'w') as f_nw:
f_nw.write(text)
with open('nw.csv', 'r', newline='') as f_nw:
csv_nw = csv.reader(f_nw)
header = next(csv_nw)
for row in csv_nw:
date.append(matplotlib.dates.date2num(datetime.strptime(row[0], '%Y-%m-%d')))
high.append(row[2])
ax = plt.gca()
#ax.xaxis.set_minor_locator(matplotlib.dates.MonthLocator([1, 7]))
#ax.xaxis.set_minor_formatter(matplotlib.dates.DateFormatter('%b'))
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y'))
#ax.tick_params(pad=20)
plt.plot(date, high, linewidth=0.5)
plt.show()
This would give you just the years:
Or if you uncomment the minor locator/formatter you would get:
Note:
You do not need to close a file if you are opening it with a with block.
The script assumes you are using Python 3.x.
To skip the header just read it in using next() before iterating over the rows in your for loop.
I'm very beginner at Python and matplotlib but trying to learn! I would like to use matplotlib to plot some simple data from a CSV containing dates with a frequency. The X axis containing dates and Y containing the frequency. Example data from CSV:
2011/12/15,5
2011/12/11,4
2011/12/19,2
I checked the "matplotlib.sf.net/examples" out but appears all the test data is downloaded from a http get. I would really appreciate if someone could guide me with some example code of how to read in (presumably using CSV reader) and display data in chart.
Thank you!!
Maybe you look for something like:
import csv
import datetime as dt
import matplotlib.pyplot as plt
arch = 'C:\\Python26\\programas\\test.csv'
data = csv.reader(open(arch))
data = [(dt.datetime.strptime(item, "%Y/%m/%d"), float(value)) for item, value in data]
data.sort()
[x, y] = zip(*data)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y)
ax.grid(True)
fig.autofmt_xdate()
plt.show()
I've tried to keep my code as simple as possible and this is by no means elegant, but here you go:
import csv
import matplotlib.pyplot as plt
### Making test CSV file ###
data = [['2011/12/15,5'],['2011/12/11,4'],['2011/12/19,2'],['2011/12/16,3'],['2011/12/20,8'],['2011/12/14,4'],['2011/12/10,10'],['2011/12/9,7']]
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for i in data:
writer.writerow(i)
### Extract data from CSV ###
with open('test.csv', 'rb') as n:
reader = csv.reader(n)
dates = []
freq = []
for row in reader:
values = row[0].split(',')
dates.append(values[0])
freq.append(values[1])
### Do plot ###
false_x = [x for x in range(len(dates))]
plt.plot(false_x,freq, 'o-')
plt.xticks(range(len(dates)), (dates), rotation=45)
# plt.axis([xmin, xmax, ymin, ymax]) - sets axes limits on graph
plt.axis([-1, 8, 0, 11])
plt.show()
This makes: