Matplotlib figure to PDF without saving - python

I have to create a group of matplotlib figures, which I would like to directly present in a PDF report without saving them as a file.
The data for my plots is stored in a Pandas DataFrame:
Right now I do not know other better option than first save the image and use it later.
I am doing something like that:
import matplotlib.pylab as plt
from reportlab.platypus import BaseDocTemplate, Image
for index, row in myDataFrame.iterrows():
fig = plt.figure()
plt.plot(row['Xvalues'], row['Yvalues'],'o', color='r')
fig.savefig('figure_%s.png' % (row['ID']))
plt.close(fig)
text = []
doc = BaseDocTemplate(pageName, pagesize=landscape(A4))
for f in listdir(myFolder):
if f.endswith('png'):
image1 = Image(f)
text.append(image1)
doc.build(text)

Here is the best solution provided by matplotlib itself:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
with PdfPages('foo.pdf') as pdf:
#As many times as you like, create a figure fig and save it:
fig = plt.figure()
pdf.savefig(fig)
....
fig = plt.figure()
pdf.savefig(fig)
VoilĂ 
Find a full example here: multipage pdf matplotlib

I think you can save the figure into a buffer using io.BytessIO and use that in platypus. Something like this perhaps?
import io
import matplotlib.pylab as plt
from reportlab.platypus import BaseDocTemplate, Image
buffers = []
for index, row in myDataFrame.iterrows():
fig = plt.figure()
plt.plot(row['Xvalues'], row['Yvalues'],'o', color='r')
mybuffer = io.BytesIO()
fig.savefig(mybuffer, format = 'pdf')
mybuffer.seek(0)
buffers.append(mybuffer)
plt.close(fig)
text = []
doc = BaseDocTemplate(pageName, pagesize=landscape(A4))
doc.build(buffers)

using my package autobasedoc https://pypi.org/project/autobasedoc/ your example would look like that:
from autobasedoc import autorpt as ar
from autobasedoc import autoplot as ap
#ap.autoPdfImg
def my_plot(index, row, canvaswidth=5): #[inch]
fig, ax = ap.plt.subplots(figsize=(canvaswidth,canvaswidth))
fig.suptitle(f"My simple plot {index}", fontproperties=fontprop)
ax.plot(row['Xvalues'], row['Yvalues'],label=f"legendlabel{index}")
return fig
doc = ar.AutoDocTemplate(pageName)
content = []
for index, row in myDataFrame.iterrows():
content.append(my_plot(index, row))
doc.build(content)

Related

How to display audio at the right side of matplotlib

The following code display the image and audio in the top-bottom style:
Here is the test code:
import librosa
import matplotlib.pyplot as plt
import IPython.display as ipd
def plot_it(name, audio, sample_rate):
plt.figure(figsize=(8, 1))
plt.plot(audio)
plt.gca().set_title(name)
plt.show()
ipd.display(ipd.Audio(data=audio, rate=sample_rate))
Is it possible for changing the "top-bottom" style to "left-right" style for displaying the audio at the right side of the plt figure?
You can use a GridspecLayout which is similar to matplotlib's GridSpec. In order to direct to output into the needed grid cells, you can capture it using the Output widget:
import librosa
import matplotlib.pyplot as plt
import IPython.display as ipd
from ipywidgets import Output, GridspecLayout
def plot_it(name, audio, sample_rate):
grid = GridspecLayout(1, 2, align_items='center')
out = Output()
with out:
fig, ax = plt.subplots(figsize=(8, 1))
ax.plot(audio)
ax.set_title(name)
plt.close(fig)
ipd.display(ax.figure)
grid[0, 0] = out
out = Output()
with out:
ipd.display(ipd.Audio(data=audio, rate=sample_rate))
grid[0, 1] = out
ipd.display(grid)
name = 'nutcracker'
filename = librosa.example(name)
y, sr = librosa.load(filename)
plot_it(name, y, sr)
(It is essential to close the figure, otherwise you'll have double output of the figure. This is easier to do this using the OOP than the pyplot interface, that's why I changed your matplotlib code a bit)

Python: Add dynamic text in Matplotlib animation

I made an animation from the list of images saved as numpy arrays.
Then I want to add a text onto the animation like like a subtitle whose text changes for each frame but placing plt.text(some_string) only adds the string at the first iteration and it does not work if I change the passed string in the loop. Below is my attempt. Please note HTML is just for Jupyter Lab.
import matplotlib.animation as animation
from PIL import Image
from IPython.display import HTML
import matplotlib.pyplot as plt
folderName = "hogehoge"
picList = glob.glob(folderName + "\*.npy")
fig = plt.figure()
ims = []
for i in range(len(picList)):
plt.text(10, 10, i) # This does not add the text properly
tmp = Image.fromarray(np.load(picList[i]))
ims.append(plt.imshow(tmp))
ani = animation.ArtistAnimation(fig, ims, interval=200)
HTML(ani.to_jshtml())
You have also to add the text object to the list of artists for each frame:
import matplotlib.animation as animation
from PIL import Image
from IPython.display import HTML
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
ims = []
for i in range(10):
artists = ax.plot(np.random.rand(10), np.random.rand(10))
text = ax.text(x=0.5, y=0.5, s=i)
artists.append(text)
ims.append(artists)
ani = animation.ArtistAnimation(fig, ims, interval=200)
HTML(ani.to_jshtml())

How to read only part of a CSV file?

I have the following code in which I read CSV files and get a graph plotted:
import numpy as np
import matplotlib.pyplot as plt
import scipy.odr
from scipy.interpolate import interp1d
plt.rcParams["figure.figsize"] = (15,10)
def readPV(filename="HE3.csv",d=32.5e-3):
t=np.genfromtxt(fname=filename,delimiter=',',skip_header=1, usecols=0)
P=np.genfromtxt(fname=filename,delimiter=',',skip_header=1, usecols=1)
V=np.genfromtxt(fname=filename,delimiter=',',skip_header=1, usecols=2,filling_values=np.nan)
V=V*np.pi*(d/2)**2
Vi= interp1d(t[~np.isnan(V)],V[~np.isnan(V)],fill_value="extrapolate")
V=Vi(t)
return P,V,t
P,V,t=readPV(filename="HE3.csv")
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(V,P,'ko')
ax.set_xlabel("Volume")
ax.set_ylabel("Pressure")
plt.show()
From this code, the following graph is made:
The CSV file has several data points in one column, separated by commas; I want to know how to pick a range of columns to read, instead of all of them.

How can I set the colors for multiple lines in an NLTK Matplotlib function?

I have an NLTK function that creates a AxesSubplot like this:
# Names ending letters frequency
import nltk
import matplotlib.pyplot as plt
cfd = nltk.ConditionalFreqDist(
(fileid, name[-1])
for fileid in names.fileids()
for name in names.words(fileid))
plt.figure(figsize=(12, 6))
cfd.plot()
And I would like to change the colors of the lines individually. Most solutions I see online generate each line individually with a separate plot line. However the matplotlib .plot() method is called within the ConditionalFreqDist .plot(). Is there an alternative way I can change the colors of the lines? I'd like the female line to be blue and the male line to be green.
NLTK's ConditionalFreqDist.plot method returns a plain matplotlib axes object as you can see here. From this, you can get the lines directly using ax.lines and set the colors using set_color.
I don't have NLTK installed now so I'll just make the axes directly, plot a red and a blue line, and turn these to black and green.
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 15, 21)
y0 = 0.6*np.sin(x)
y1 = np.sin(1.2 + 0.5*x)
fig, ax = plt.subplots(1,1)
ax.plot(x, y0, 'r')
ax.plot(x, y1, 'b')
# this is where ConditionalFreqDist will return the axes
# these are the lines you would write
ax.lines[0].set_color('k')
ax.lines[1].set_color('g')
Specifically, for the OP's case it should look like:
import nltk
import matplotlib.pyplot as plt
cfd = nltk.ConditionalFreqDist(
(fileid, name[-1])
for fileid in names.fileids()
for name in names.words(fileid))
plt.figure(figsize=(12, 6))
ax = cfd.plot()
ax.lines[0].set_color('k')
ax.lines[1].set_color('g')
This ended up working, using the suggestions from #tom10 :
import nltk
import matplotlib.pyplot as plt
cfd = nltk.ConditionalFreqDist(
(fileid, name[-1])
for fileid in names.fileids()
for name in names.words(fileid))
fig, ax = plt.subplots(1,1)
cfd.plot()
ax.lines[0].set_color('blue')
ax.lines[1].set_color('green')
fig.set_size_inches(10, 4)
fig

Insert matplotlib images into a pandas dataframe

PURPOSE: I am currently working with rdkit to colour the structures of my molecules according to rdkit.Chem.Draw.SimilarityMaps. Now, I would like to use the matplotlib images SimilarityMaps function to introduce them in a pandas dataframe and export this table in the form of an html file.
CODE: I tried to do that with the following code
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions
df = pd.DataFrame({'smiles':['Nc1nc(NC2CC2)c3ncn([C##H]4C[C#H](CO)C=C4)c3n1','CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O','CCN(CC)CCNC(=O)C1=CC=C(C=C1)NC(=O)C','CC(=O)NC1=CC=C(C=C1)O','CC(=O)Nc1sc(nn1)[S](N)(=O)=O']})
def getSim(smi):
mol = Chem.MolFromSmiles(smi)
refmol = Chem.MolFromSmiles('c1ccccc1')
fp = SimilarityMaps.GetMorganFingerprint(mol, fpType='bv')
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(refmol, mol, SimilarityMaps.GetMorganFingerprint)
return fig
df['map'] = df['smiles'].map(getSim)
df.to_html('/.../test.html')
When I open the file test.html, the map column contains the information "Figure (200x200)". I check if my dataframe map column contains object: it's OK in python but not in html file.
QUESTION: I'm not sure how to get a dataframe with images and I'd like to have the help of the community to clarify this subject.
Thanks in advance
What you see as Figure (200x200) is the __repr__ string of the matplotlib Figure class. It is the text representation of that python object (the same that you would see when doing print(fig)).
What you want instead is to have an actual image in the table. An easy option would be to save the matplotlib figure as png image, create an html tag, <img src="some.png" /> and hence show the table.
import pandas as pd
import numpy as np;np.random.seed(1)
import matplotlib.pyplot as plt
import matplotlib.colors
df = pd.DataFrame({"info" : np.random.randint(0,10,10),
"status" : np.random.randint(0,3,10)})
cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])
def createFigure(i):
fig, ax = plt.subplots(figsize=(.4,.4))
fig.subplots_adjust(0,0,1,1)
ax.axis("off")
ax.axis([0,1,0,1])
c = plt.Circle((.5,.5), .4, color=cmap(i))
ax.add_patch(c)
ax.text(.5,.5, str(i), ha="center", va="center")
return fig
def mapping(i):
fig = createFigure(i)
fname = "data/map_{}.png".format(i)
fig.savefig(fname)
imgstr = '<img src="{}" /> '.format(fname)
return imgstr
df['image'] = df['status'].map(mapping)
df.to_html('test.html', escape=False)
The drawback of this is that you have a lot of images saved somewhere on disk. If this is not desired, you may store the image encoded as base64 in the html file, <img src="data:image/png;base64,iVBORw0KGgoAAAAN..." />.
import pandas as pd
import numpy as np;np.random.seed(1)
import matplotlib.pyplot as plt
import matplotlib.colors
from io import BytesIO
import base64
df = pd.DataFrame({"info" : np.random.randint(0,10,10),
"status" : np.random.randint(0,3,10)})
cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])
def createFigure(i):
fig, ax = plt.subplots(figsize=(.4,.4))
fig.subplots_adjust(0,0,1,1)
ax.axis("off")
ax.axis([0,1,0,1])
c = plt.Circle((.5,.5), .4, color=cmap(i))
ax.add_patch(c)
ax.text(.5,.5, str(i), ha="center", va="center")
return fig
def fig2inlinehtml(fig,i):
figfile = BytesIO()
fig.savefig(figfile, format='png')
figfile.seek(0)
# for python 2.7:
#figdata_png = base64.b64encode(figfile.getvalue())
# for python 3.x:
figdata_png = base64.b64encode(figfile.getvalue()).decode()
imgstr = '<img src="data:image/png;base64,{}" />'.format(figdata_png)
return imgstr
def mapping(i):
fig = createFigure(i)
return fig2inlinehtml(fig,i)
with pd.option_context('display.max_colwidth', -1):
df.to_html('test.html', escape=False, formatters=dict(status=mapping))
The output looks the same, but there are no images saved to disk.
This also works nicely in a Jupyter Notebook, with a small modification,
from IPython.display import HTML
# ...
pd.set_option('display.max_colwidth', -1)
HTML(df.to_html(escape=False, formatters=dict(status=mapping)))

Categories

Resources