rdkit: how to draw high resolution chemical structure

rdkit: how to draw high resolution chemical structure - python

I am using jupyter lab to draw chemical structures. But the output image resolution is too low. How can I improve it?
from rdkit import Chem
from rdkit.Chem import Draw
smiles = 'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O'
m = Chem.MolFromSmiles(smiles)
Draw.MolToImage(m)
Thanks a lot

I have found a solution, more information can be found here
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG
smiles = 'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O'
m = Chem.MolFromSmiles(smiles)
def moltosvg(mol, molSize = (300,300), kekulize = True):
mc = Chem.Mol(mol.ToBinary())
if kekulize:
try:
Chem.Kekulize(mc)
except:
mc = Chem.Mol(mol.ToBinary())
if not mc.GetNumConformers():
rdDepictor.Compute2DCoords(mc)
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
drawer.DrawMolecule(mc)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
return svg.replace('svg:','')
SVG(moltosvg(m))

For those looking for a solution with higher resolution molecule output AND export. The cairosvg library (also command-line program) has export options for file types including .svg, .pdf, .png, .eps (https://cairosvg.org/).
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
import cairosvg
import io
def molecule_to_pdf(mol, file_name, width=300, height=300):
"""Save substance structure as PDF"""
# Define full path name
full_path = f"./figs/2Dstruct/{file_name}.pdf"
# Render high resolution molecule
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
drawer.DrawMolecule(mol)
drawer.FinishDrawing()
# Export to pdf
cairosvg.svg2pdf(bytestring=drawer.GetDrawingText().encode(), write_to=full_path)
# Example
m = Chem.MolFromSmiles('Cn1cnc2n(C)c(=O)n(C)c(=O)c12')
molecule_to_pdf(m, "myfav")

Related

Using satpy, Blending multiple satellite image

I want blend multiple satellite images. but an error occured. I followed the example on the satpy document.
here is code and netcdf file is here : https://drive.google.com/drive/folders/1zp6EBVfjuh41LDRRZo4PJoeGGGn13AKy?usp=sharing
from glob import glob
from satpy import Scene, MultiScene, DataQuery
from satpy.utils import debug_on
debug_on()
areaid = 'worldeqc3km70'
eumetsat = glob('E:/Global/combine_test/MSG4-SEVI-MSG15-0100-NA-20210801000010.306000000Z-20210801001259-4774254.nat')
goes17 = glob('E:/Global/combine_test/OR_ABI-L1b-RadF-M6C13_G17_s20212130000319_e20212130009396_c20212130009445.nc')
gk2a = glob('E:/Global/combine_test/gk2a_ami_le1b_ir105_fd020ge_202108010000.nc')
goes17_scene = Scene(reader="abi_l1b", filenames=goes17)
eumetsat_scene = Scene(reader="seviri_l1b_native", filenames=eumetsat)
gk2a_scene = Scene(reader="ami_l1b", filenames=gk2a)
goes17_scene.load(["C13"])
eumetsat_scene.load(['IR_108'])
gk2a_scene.load(["IR105"])
mscn = MultiScene([goes17_scene, eumetsat_scene, gk2a_scene])
#groups = {DataQuery(name='IR_group', wavelength=(9.8, 10.8, 11.8)): ['C13', 'IR105', 'IR_108']}
groups = {DataQuery(name="IR_group", wavelength=(10, 11, 12)): ['C13', 'IR_108', 'IR105']}
mscn.group(groups)
print(mscn.loaded_dataset_ids)
resampled = mscn.resample(areaid, reduce_data=False)
blended = resampled.blend()
blended.save_datasets(filename='./test_{area}.png'.format(area=areaid))
Error message:
RuntimeError: None of the requested datasets have been generated or could not be loaded. Requested composite inputs may need to have matching dimensions (eg. through resampling).

As mentioned in the comments this is a known bug that will hopefully be fixed in the next couple weeks. Follow issue 2089 for more information.
The short-term workaround is to make your own "blend" method that handles things the way you expect:
from satpy.multiscene import stack
def my_blend(mscn, common_datasets, blend_function=stack):
new_scn = Scene()
for ds_id in common_datasets:
datasets = [scn[ds_id] for scn in mscn.scenes if ds_id in scn]
new_scn[ds_id] = blend_function(datasets)
return new_scn
blended = my_blend(resampled, ["ir_group"])

Python: convert procedural script to class and object (oop) style

I am still learning to code in python and I am struggling to code object-oriented way. I have written a code using pytessarct library and with the extracted words I have tried to make a simple detector by using keywords as a filter. I want to redo it in class and object format strictly as a learning exercise. Will be highly grateful if anybody can extend any help. Thank you
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
import os
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from PIL import ImageFilter
import re
img_path = 'C:/Users/RAJ/realtest/'
for i in os.listdir(img_path):
images=Image.open(img_path+'//'+i)
plt.imshow(images)
plt.show()
images_new=images.convert('LA')
im_SHARPEN2 = images_new.filter(filter=ImageFilter.SHARPEN)
extract = pytesseract.image_to_string(im_SHARPEN2, lang = 'eng')
extract2 = pytesseract.image_to_string(images_new,lang = 'eng')
final= extract+extract2
x = re.search(r"INCOME|TAX|Account|GOVT.", final,re.M|re.I)
y = re.search(r"GOVERNMENT|DOB|Male|Female.", final,re.M|re.I)
if x == None and y== None:
print('Not a pan card or adhaar card')
elif type(x)== re.Match:
print('This is a pan card')
else:
print('adhaar card detected')

import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
import os
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from PIL import ImageFilter
import re
class is_pan_card():
def __init__(self,path):
self.img_path = path
for i in os.listdir(self.img_path):
images=Image.open(self.img_path+'//'+i)
plt.imshow(images)
plt.show()
images_new=images.convert('LA')
im_SHARPEN2 = images_new.filter(filter=ImageFilter.SHARPEN)
extract = pytesseract.image_to_string(im_SHARPEN2, lang = 'eng')
extract2 = pytesseract.image_to_string(images_new,lang = 'eng')
final= extract+extract2
x = re.search(r"INCOME|TAX|Account|GOVT.", final,re.M|re.I)
y = re.search(r"GOVERNMENT|DOB|Male|Female.", final,re.M|re.I)
if x == None and y== None:
print('Not a pan card or adhaar card')
elif type(x)== re.Match:
print('This is a pan card')
else:
print('adhaar card detected')
is_pan =is_pan_card('C:/Users/RAJ/realtest/')

ImageReader method of reportlab library does not work

reportlab ImageReader('url') of PIL library does not work.
my env: Python 3.7.6, Pillow 7.0.0, reportlab 3.5.32 (i tried also different version of PIL and reportlab... same error)
img = ImageReader('https://www.google.it/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png')
my error
Cannot open resource "https://www.google.it/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"
fileName='https://www.google.it/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png' identity=[ImageReader#0x119474090 filename='https://www.google.it/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png']

Solved...Solution using urls images inside tables on reportlab:
from PIL import Image as pillowImage
from django.core.files.storage import default_storage
from io import BytesIO
from reportlab.platypus import Image
cloud_image = default_storage.open('url') # using s3Storage
img = pillowImage.open(cloud_image)
img_byte_arr = BytesIO()
img.save(img_byte_arr, format=img.format)
result = Image(img_byte_arr, w * cm, h * cm)
....
data1 = [[result]]
t1 = Table(data1, colWidths=(9 * cm))
t1.setStyle(TableStyle([('VALIGN', (0, 0), (-1, -1), 'LEFT'),]))
t1.hAlign = 'LEFT'
story.append(t1)
...

give a bytes to reportlab.lib.utils.ImageReader

I have read that you can use a bytes like object to reportlab.lib.utils.ImageReader(). If I read in a file path it works fine, but I want to use a byte like object instead that way I can save the plot I want in memory, and not have to constantly be saving updated plots on the drive.
This is where I found the code to convert the image into a string
https://www.programcreek.com/2013/09/convert-image-to-string-in-python/
This is an example of how to use BytesIO as input for ImageReader()
How to draw image from raw bytes using ReportLab?
This class is used to make a plot and pass in a save it to memory with BytesIO(). string is the value I'm going to pass later
#imports
import PyPDF2
from io import BytesIO
from reportlab.lib import utils
from reportlab.lib.pagesizes import landscape, letter
from reportlab.platypus import (Image, SimpleDocTemplate,
Paragraph, Spacer)
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch, mm
import datetime
import os
import csv
import io
import base64
import urllib
from django.contrib import admin
from django.forms import model_to_dict
from django.http import HttpResponse
from django.urls import path
from django.views.decorators.csrf import csrf_protect
from django.utils.decorators import method_decorator
from reporting import models, functions, functions2
import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot as plt
import numpy as np
def make_plot(data):
items = [tuple(item) for item in data.items()]
keys = [item[0] for item in items]
vals = [item[1] for item in items]
fig, ax = plt.subplots()
ind = np.arange(len(keys)) # the x locations for the groups
width = 0.35 # the width of the bars
rects1 = ax.bar(ind - width/2, vals, width)
ax.set_ylabel('Count')
ax.set_xticks(ind)
ax.set_xticklabels(keys)
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
string = base64.b64encode(buf.read())
return 'data:image/png;base64,' + urllib.parse.quote(string), string
This is the minimum code to show how the information is moved to where the error occurs.
class ProgressReportAdmin(ReadOnlyAdmin):
current_extra_context = None
#csrf_protect_m
def changelist_view(self, request, extra_context=None):
plot = make_plot(data)
self.current_extra_context = plot[1]
def export(self, request):
image = self.current_extra_context
pdf = functions.LandscapeMaker(image, fname, rotate=True)
pdf.save()
This is where the error occurs, in the scaleImage function
class LandscapeMaker(object):
def __init__(self, image_path, filename, rotate=False):
self.pdf_file = os.path.join('.', 'media', filename)
self.logo_path = image_path
self.story = [Spacer(0, 1*inch)]
def save(self):
fileObj = BytesIO()
self.doc = SimpleDocTemplate(fileObj, pagesize=letter,
leftMargin=1*inch)
self.doc.build(self.story,
onFirstPage=self.create_pdf)
def create_pdf(self, canvas, doc):
logo = self.scaleImage(self.logo_path)
def scaleImage(self, img_path, maxSize=None):
#Error1 occurs on
img = utils.ImageReader(img_path)
img.fp.close()
#Error2
#image = BytesIO(img_path)
#img = utils.ImageReader(image)
#img.fp.close()
For Error1 I receive:
raise IOError('Cannot open resource "%s"' % name)
img = utils.ImageReader(img_path)
"OSError: Cannot open resource "b'iVBORw0KGgoAAA' etc.,
For Error2 I receive
OSError: cannot identify image file <_io.BytesIO object at 0x7f8e4057bc50>
cannot identify image file <_io.BytesIO object at 0x7f8e4057bc50>
fileName=<_io.BytesIO object at 0x7f8e4057bc50> identity=[ImageReader#0x7f8e43fd15c0]

I think you have to pass buff to ImageReader somehow.
I'm using this function to save and draw the figures I generate with matplotlib and it works perfectly for me.
seek(offset, whence=SEEK_SET) Change the stream position to the given offset. Behaviour depends on the whence parameter. The default value for whence is SEEK_SET.
getvalue() doesn't work except the seek(0)
def save_and_draw(fig, x_img, y_img, width_img=width_img, height_img=height_img):
imgdata = BytesIO()
fig.savefig(imgdata, format='png')
imgdata.seek(0)
imgdata = ImageReader(imgdata)
self.c.drawImage(imgdata, x_img, y_img, width_img, height_img)
plt.close(fig)

How to give chart title to a chart in Python-pptx chart in Chart Area(Not the slide title)

I am trying to add text to chart title in chart area in a PPT Slide(Not the slide title).I have http://python-pptx.readthedocs.io/en/latest/dev/analysis/cht-chart-title.html this link if any text can be added to my chart but I couldn't find the solution.Here is my,
import numpy as np
import pandas as pd
import pyodbc
#import pptx as ppt
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.chart.data import ChartData
from pptx.chart.data import XyChartData
from pptx.enum.chart import XL_CHART_TYPE
from pptx.util import Inches,Pt
from pptx.enum.chart import XL_LABEL_POSITION
from pptx.dml.color import RGBColor
from pptx.dml import fill
from pptx.chart.chart import ChartTitle
from pptx.chart.chart import Chart
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=SNAME;"
"Database=DNAME;"
"Trusted_Connection=yes;")
AvgResponseTimequery = 'EXEC [SLA].[MONTHWISEREPORT]'
df=pd.read_sql(sql=AvgResponseTimequery,con=cnxn)
#df
getprs = Presentation('D:\SLA Pyth\hubiC-06-20-2017 1_10_55\SLAPerformance.pptx')
slide = getprs.slides.add_slide(getprs.slide_layouts[5])
slide.shapes.title.text = 'Key Performance KPIs'
chart_data = ChartData()
chart_data.categories = df['Month'].values.tolist()
chart_data.add_series('Average Report Response time(Seconds)', tuple(df['Avg Response Time']))
x, y, cx, cy = Inches(0.5), Inches(2), Inches(9), Inches(3)
chart=slide.shapes.add_chart(
XL_CHART_TYPE.COLUMN_CLUSTERED, x, y, cx, cy, chart_data
).chart
#chart.has_title = True
#Chart.chart_title = "Response Time in Seconds" #Tried to add text here, I didnt get any error though
#ChartTitle.has_text_frame = True
chart.has_title = True
chart.chart_title = "Response Time in Seconds"
# Check the has_text_frame property of this chart's chart_title:
print(chart.has_text_frame)
plot = chart.plots[0]
plot.has_data_labels = True
data_labels = plot.data_labels
chart.series[0].format.fill.solid()
chart.series[0].format.fill.fore_color.rgb = RGBColor(46, 125, 137)
getprs.save('D:\SLA REPORT MONTH WISE\SALReport1.pptx')

Possible typo, case-sensitivity. When you do Chart.chart_title you're referring to the class Chart not your chart object. Likewise, ChartTitle.has_text_frame refers to the class ChartTitle not your chart!
After installing this pptx package and debugging (I got error on chart.has_title, etc.), I think you need:
chart.chart_title.has_text_frame=True
chart.chart_title.text_frame.text='Response Time in Seconds'
NOTE: You don't need this line:
chart.chart_title.has_text_frame=True
Setting the text_frame.text will be sufficient.
Here is exact code I used to test. First, create a new presentation with only 1 slide. Remove all shapes/placeholders from that slide and insert 1 chart only. Save & close the presentation.
from pptx import Presentation
from pptx.chart.data import ChartData
from pptx.chart.data import XyChartData
from pptx.enum.chart import XL_CHART_TYPE
from pptx.util import Inches,Pt
from pptx.enum.chart import XL_LABEL_POSITION
from pptx.dml.color import RGBColor
from pptx.dml import fill
from pptx.chart.chart import ChartTitle
from pptx.chart.chart import Chart
file = 'c:\debug\pres.pptx'
pres = Presentation(file)
slide = pres.slides[0]
chart = slide.shapes[0].chart
chart.chart_title.text_frame.text='my new chart title'
pres.save(file)
Further, from console, I see these types, indicating that chart.chart_title is not an instance of str object, etc:
>>> type(chart)
<class 'pptx.chart.chart.Chart'>
>>> type(chart.chart_title)
<class 'pptx.chart.chart.ChartTitle'>
Note, the documentation indicates:
Currently python-pptx requires Python 2.6, 2.7, 3.3 or 3.4.
If you are using python 3.6 then perhaps that is why it is not working as expected, it is not supported version of python.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

rdkit: how to draw high resolution chemical structure - python

I am using jupyter lab to draw chemical structures. But the output image resolution is too low. How can I improve it? from rdkit import Chem from rdkit.Chem import Draw smiles = 'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O' m = Chem.MolFromSmiles(smiles) Draw.MolToImage(m) Thanks a lot

Related

Using satpy, Blending multiple satellite image

Python: convert procedural script to class and object (oop) style

ImageReader method of reportlab library does not work

give a bytes to reportlab.lib.utils.ImageReader

How to give chart title to a chart in Python-pptx chart in Chart Area(Not the slide title)

Categories

Resources