I have big data as csv file which has too many dates, so when I plot it, x axis writes all of them, like f.e : from 2000-12-24 to 2017-12-24 and also y axis.
I have tried to use a set, but that set needs to sort and problem is that when I sort it the data from Y isn't for any of sorted dates.
import matplotlib.pyplot as plt
import urllib as u
import numpy as np
import csv
stock_price_url = 'https://pythonprogramming.net/yahoo_finance_replacement'
date = []
openp = []
high = []
low = []
close = []
adjclose = []
volume = []
text = u.request.urlopen(stock_price_url).read().decode()
with open('nw.csv', 'w') as fw:
fw.write(text)
fw.close()
with open('nw.csv', 'r') as csvf:
f = csv.reader(csvf, delimiter=',')
for row in f:
if 'Date' not in row:
date.append(row[0])
openp.append(row[1])
high.append(row[2])
low.append(row[3])
close.append(row[4])
adjclose.append(row[5])
volume.append(row[6])
dateset = set([])
for z in date:
dateset.add(z[:4])
highset = []
for z in high:
highset.append(z[:3])
plt.plot(set(dateset), set(highset), linewidth=0.5)
plt.show()
You need to convert the dates first into a Python datetime object. This can then be converted into a matplotlib number. With this you can then tell matplotlib to add ticks based on year or month changes:
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt
import urllib as u
import numpy as np
import csv
stock_price_url = 'https://pythonprogramming.net/yahoo_finance_replacement'
date = []
high = []
text = u.request.urlopen(stock_price_url).read().decode()
with open('nw.csv', 'w') as f_nw:
f_nw.write(text)
with open('nw.csv', 'r', newline='') as f_nw:
csv_nw = csv.reader(f_nw)
header = next(csv_nw)
for row in csv_nw:
date.append(matplotlib.dates.date2num(datetime.strptime(row[0], '%Y-%m-%d')))
high.append(row[2])
ax = plt.gca()
#ax.xaxis.set_minor_locator(matplotlib.dates.MonthLocator([1, 7]))
#ax.xaxis.set_minor_formatter(matplotlib.dates.DateFormatter('%b'))
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y'))
#ax.tick_params(pad=20)
plt.plot(date, high, linewidth=0.5)
plt.show()
This would give you just the years:
Or if you uncomment the minor locator/formatter you would get:
Note:
You do not need to close a file if you are opening it with a with block.
The script assumes you are using Python 3.x.
To skip the header just read it in using next() before iterating over the rows in your for loop.
Related
Here is the result. As you can see the order is 1,2,4,3 it should be 1,2,3,4. Here how i render it
import csv
import matplotlib.pyplot as plt
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(row[0])
total.append(row[1])
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.show()
and here is my csv
2022-08-25,1
2022-08-26,2
2022-08-27,4
2022-08-29,3
How can i fix it ? thanks in advance
The problem with your code is that the y-values are strings. This simple casting to float would fix that:
import matplotlib.pyplot as plt
import csv
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(row[0])
total.append(float(row[1]))
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.show()
But notice there is also the same problem with dates, last two dates are not one day from each other but twice as much. Matplotlib know how to handle datetimes though:
import csv
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
time = []
total = []
with open('new.csv','r') as csvfile:
plots = csv.reader(csvfile, delimiter = ',')
for row in plots:
time.append(dt.strptime(row[0], "%Y-%m-%d"))
total.append(float(row[1]))
plt.plot(time, total)
plt.xlabel('Dates')
plt.ylabel('Transactions')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
plt.gcf().autofmt_xdate()
plt.show()
Little help taken from: Format of datetime in pyplot axis
I am writing a script which can be used to plot the country wise covid time-series data. It is working fine when I plot a single country but The scale at Y-axis is in appropriately printed.
Plot which I am getting The Problem is after printing the maximum value for one country the y axis is extrapolated with smaller values to plot the data points of subsequent countries.
The code for my script is as follows
import requests
from contextlib import closing
import csv
import matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
def prepareCountryWiseData(country):
countryWise = {}
with closing(requests.get(url, stream=True)) as r:
f = (line.decode('utf-8') for line in r.iter_lines())
reader = csv.reader(f, delimiter=',', quotechar='"')
active = []
recovered = []
dates = []
for row in reader:
if row[1] == country:
dates.append(row[0])
active.append(row[2])
recovered.append(row[3])
return (dates, active, recovered)
def plotCountryWiseData(countryList):
plotable = []
for country in countryList:
dates,active,recovered = (prepareCountryWiseData(country))
plt.plot(active)
plt.ylabel('active_cases')
plt.legend(countryList)
plt.show()
plotCountryWiseData(['India','US','Italy'])
If you can use the pandas module your job would be much easier:
import pandas as pd, matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
df = pd.read_csv(url)
fig,ax = plt.subplots()
for k,g in df[df['Country'].isin(['India','US','Italy'])].groupby('Country'):
ax = g.plot(ax=ax,kind='line',x='Date',y='Confirmed',label=k)
plt.gcf().suptitle('Active Cases')
plt.show()
Result:
I basically want to convert the unix time values I have in the first row of my text file named 'EKTE9' to date. I think using the datetime library is the way to go, but I don't know how to implement that in my code. Some answers would be appreciated.
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('EKTE9.txt','r') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(int(row[0]))
y.append(float(row[3]))
plt.plot(x,y, label='Temperatur')
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
Here are some values from the EKTE9.txt file:
1554058225,0.80,2.90,13.60,27.20
1554058525,0.30,0.80,9.60,26.70
The values goes on for about 200 lines
That did the job. Thank you very much Martin Evans.
Fina code:
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import csv
x = []
y = []
with open('EKTE9.txt','r') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(matplotlib.dates.date2num(datetime.fromtimestamp(int(row[0]))))
y.append(float(row[3]))
hfmt = matplotlib.dates.DateFormatter('%d\n%H:%M')
plt.plot(x,y, label='Temperatur')
plt.gca().xaxis.set_major_formatter(hfmt)
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
You would first create a datetime object using datetime.fromtimestamp(). This can then be converted to a matplotlib number using date2num(). Finally, you should use a DateFormatter() to help with displaying the x-axis.
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import csv
x = []
y = []
with open('EKTE9.txt', 'r', newline='') as csvfile:
plots = csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(matplotlib.dates.date2num(datetime.fromtimestamp(int(row[0]))))
y.append(float(row[3]))
hfmt = matplotlib.dates.DateFormatter('%d\n%H:%M')
plt.plot(x,y, label='Temperatur')
plt.gca().xaxis.set_major_formatter(hfmt)
plt.xlabel('Tid')
plt.ylabel('Temperatur')
plt.title('Grafen viser temperatur under forsøket\n')
plt.legend()
plt.show()
This would then show your two values as follows:
I am having a text file with time and a float value. I have heard that it is possible to plot these two columns using matplotlib. Searched similar threads but could not make it happening. My code and Data are-
import math
import datetime
import matplotlib
import matplotlib.pyplot as plt
import csv
with open('MaxMin.txt','r') as f_input:
csv_input = csv.reader(f_input, delimiter=' ', skipinitialspace=True)
x = []
y = []
for cols in csv_input:
x = matplotlib.dates.date2num(cols[0])
y = [float(cols[1])]
# naming the x axis
plt.xlabel('Real-Time')
# naming the y axis
plt.ylabel('Acceleration (m/s2)')
# giving a title to my graph
plt.title('Accelerometer reading graph!')
# plotting the points
plt.plot(x, y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
# function to show the plot
plt.show()
And part of the Data in MaxMin.txt
23:28:30.137 10.7695982757
23:28:30.161 10.4071263594
23:28:30.187 9.23969855461
23:28:30.212 9.21066485657
23:28:30.238 9.25117645762
23:28:30.262 9.59227680741
23:28:30.287 9.9773536301
23:28:30.312 10.0128275058
23:28:30.337 9.73353441664
23:28:30.361 9.75064993988
23:28:30.387 9.717339267
23:28:30.412 9.72736788911
23:28:30.440 9.62451269364
I am a beginner in Python and on python 2.7.15 in windows 10 pro(64 bit). I have installed numpy,scipy scikit-learn already. Please help.
Final Output Graph from complete Data Set. Thanks # ImportanceOfBeingErnest
You could use pandas to achieve this, first store your file in a .csv format:
import math
import datetime
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd #### import this library
df = pd.read_csv("path_to_file.csv", delimiter=' ', encoding='latin-1')
x = df.ix[:,0]
y = df.ix[:,1]
# naming the x axis
plt.xlabel('Real-Time')
# naming the y axis
plt.ylabel('Acceleration (m/s2)')
# giving a title to my graph
plt.title('Accelerometer reading graph!')
# plotting the points
plt.plot(x, y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
# function to show the plot
plt.show()
if the first colunm does not have a datatime format you may convert it to this format like df.ix[:,0] = pd.to_datetime(df.ix[:,0])
and you take the hour for example:
df.ix[:,0] = df.ix[:,0].map(lambda x: x.hour)
The output after running the code was like:
The error you made in the original attempt is actually pretty minor. Instead of appending the values from the loop you redefined them.
Also you would need to use datestr2num instead of date2num, because the string read in is not yet a date.
import matplotlib
import matplotlib.pyplot as plt
import csv
with open('MaxMin.txt','r') as f_input:
csv_input = csv.reader(f_input, delimiter=' ', skipinitialspace=True)
x = []
y = []
for cols in csv_input:
x.append(matplotlib.dates.datestr2num(cols[0]))
y.append(float(cols[1]))
# naming the x axis
plt.xlabel('Real-Time')
# naming the y axis
plt.ylabel('Acceleration (m/s2)')
# giving a title to my graph
plt.title('Accelerometer reading graph!')
# plotting the points
plt.plot_date(x, y)
# beautify the x-labels
plt.gcf().autofmt_xdate()
# function to show the plot
plt.show()
My recommendation for how to make this easier would be, to use numpy and convert the input to datetime.
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
x,y= np.loadtxt('MaxMin.txt', dtype=str, unpack=True)
x = np.array([datetime.strptime(i, "%H:%M:%S.%f") for i in x])
y = y.astype(float)
plt.plot(x,y)
plt.gcf().autofmt_xdate()
plt.show()
Concerning the ticking of the axes: In order to have ticks every half a second you can use a MicrosecondLocator with an interval of 500000.
import matplotlib.dates
# ...
loc = matplotlib.dates.MicrosecondLocator(500000)
plt.gca().xaxis.set_major_locator(loc)
plt.gca().xaxis.set_major_formatter(matplotlib.dates.AutoDateFormatter(loc))
I'm very beginner at Python and matplotlib but trying to learn! I would like to use matplotlib to plot some simple data from a CSV containing dates with a frequency. The X axis containing dates and Y containing the frequency. Example data from CSV:
2011/12/15,5
2011/12/11,4
2011/12/19,2
I checked the "matplotlib.sf.net/examples" out but appears all the test data is downloaded from a http get. I would really appreciate if someone could guide me with some example code of how to read in (presumably using CSV reader) and display data in chart.
Thank you!!
Maybe you look for something like:
import csv
import datetime as dt
import matplotlib.pyplot as plt
arch = 'C:\\Python26\\programas\\test.csv'
data = csv.reader(open(arch))
data = [(dt.datetime.strptime(item, "%Y/%m/%d"), float(value)) for item, value in data]
data.sort()
[x, y] = zip(*data)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y)
ax.grid(True)
fig.autofmt_xdate()
plt.show()
I've tried to keep my code as simple as possible and this is by no means elegant, but here you go:
import csv
import matplotlib.pyplot as plt
### Making test CSV file ###
data = [['2011/12/15,5'],['2011/12/11,4'],['2011/12/19,2'],['2011/12/16,3'],['2011/12/20,8'],['2011/12/14,4'],['2011/12/10,10'],['2011/12/9,7']]
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for i in data:
writer.writerow(i)
### Extract data from CSV ###
with open('test.csv', 'rb') as n:
reader = csv.reader(n)
dates = []
freq = []
for row in reader:
values = row[0].split(',')
dates.append(values[0])
freq.append(values[1])
### Do plot ###
false_x = [x for x in range(len(dates))]
plt.plot(false_x,freq, 'o-')
plt.xticks(range(len(dates)), (dates), rotation=45)
# plt.axis([xmin, xmax, ymin, ymax]) - sets axes limits on graph
plt.axis([-1, 8, 0, 11])
plt.show()
This makes: