# Create an ImageJ gateway with the newest available version of ImageJ.
import imagej
import pathlib
import numpy
ij = imagej.init()
# Load an image.
img_path = pathlib.Path('C:/Users/Bernardo/TCC/thyroid/1_1.jpg')
image = ij.io().open(str(img_path))
ij.py.show(image, cmap='gray')
I wanna plot a histogram using pyimagej, after reading this image.
well, you can just use matplotlib:
# Create an ImageJ gateway with the newest available version of ImageJ.
import imagej
import pathlib
import numpy
ij = imagej.init()
# Load an image.
img_path = pathlib.Path('C:/Users/Bernardo/TCC/thyroid/1_1.jpg')
image = ij.io().open(str(img_path))
ij.py.show(image, cmap='gray')
import matplotlib.pyplot as plt
plt.hist(image.flatten())
Related
I am new in working with python and I am using Melissa Dell's package to extract data from a table image. My image looks like this:
enter image description here
And my code, for now, is the following one:
pip install layoutparser[ocr]
import layoutparser as lp
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import cv2
from google.cloud.vision_v1 import types
import json
import re
from google.cloud import vision
pip show google-cloud-vision
ocr_agent = lp.GCVAgent.with_credential('mycredebtials.json',
languages = ['es'])
img = plt.imread(r'D:\pdfDispacher.do_Página_2.jpg', cv2.IMREAD_COLOR)
print(img)
plt.imshow(img)
res = ocr_agent.detect(img, return_response=True)
texts = ocr_agent.gather_text_annotations(res)
layout = ocr_agent.gather_full_text_annotation(res, agg_level=lp.GCVFeatureType.WORD)
lp.draw_box(img, layout)
lp.draw_text(img, layout, font_size=12, with_box_on_text=True,
text_box_width=1)
What I need is to tell python to get all the columns and rows and save them in CSV format. But I am not able to get this done.
I really appreciate it if anyone can help me with the next lines.
I am trying to open an .img. I run the following code:
import matplotlib.pyplot as plt
from planetaryimage import PDS3Image
image = ('/Users/alyse/ldem_1024_00n_15n_150_180.img')
plt.imshow(image, cmap='gray')
I get the following error: TypeError: Image data of dtype <U46 cannot be converted to float
You can also use PIL. To install: pip install pillow
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
image = Image.open('/Users/alyse/ldem_1024_00n_15n_150_180.img')
image_gray = image.convert("L") # Where L is option for grayscale
array_gray = np.asarray(image_gray)
plt.imshow(array_gray, cmap="gray")
plt.show()
I've tried to import a png file in Python 3.6 with Jupyter Notebook with no success.
I've seen some examples that don't work, at least not anymore, i.e.
import os,sys
import Image
jpgfile = Image.open("picture.jpg")
There is no module called Image that I can install with either:
conda install Image
or
pip install Image
Any simple solution would be greatly appreciated!
You can display an image from file in a Jupyter Notebook as follows:
from IPython.display import Image
img = 'fig31_Drosophila.jpg'
Image(url=img)
where img = 'fig31_Drosophila.jpg' is the path and filename of the image you want. (here, the image is in the same folder as the main script)
alternatively:
from IPython.display import Image
img = 'fig31_Drosophila.jpg'
Image(filename=img)
You can specify optional args (for width and height for instance:
from IPython.display import Image
img = 'fig31_Drosophila.jpg'
Image(url=img, width=100, height=100)
I have come across this link for face detection and image cropping. I would like to use this script but I have cv2 install and only import cv2 works but not import cv.
How can I convert the cv functions in the following function to cv2 functions?
def faces_from_pil_image(pil_image):
"Return a list of (x,y,h,w) tuples for faces detected in the PIL image"
storage = cv.CreateMemStorage(0)
facial_features = cv.Load('haarcascade_frontalface_alt.xml', storage=storage)
cv_im = cv.CreateImageHeader(pil_image.size, cv.IPL_DEPTH_8U, 3)
cv.SetData(cv_im, pil_image.tostring())
faces = cv.HaarDetectObjects(cv_im, facial_features, storage)
# faces includes a `neighbors` field that we aren't going to use here
return [f[0] for f in faces]
Either use
import cv2
storage = cv2.cv.CreateMemStorage(0)
or
from cv2 import *
storage = cv.CreateMemStorage(0)
I have a remote PDF file that I need to read page by page and keep passing each to an OCR which will give me its OCR text.
import pytesseract
from pyPdf import PdfFileWriter, PdfFileReader
import cStringIO
from wand.image import Image
import urllib2
import tempfile
import pytesseract
from PIL import Image
remoteFile = urllib2.urlopen(urllib2.Request("file:///home/user/Documents/TestDocs/test.pdf")).read()
memoryFile = cStringIO.StringIO(remoteFile)
pdfFile = PdfFileReader(memoryFile)
for pageNum in xrange(pdfFile.getNumPages()):
currentPage = pdfFile.getPage(pageNum)
## somehow convert currentPage to wand type
## image and then pass to tesseract-api
##
## TEMP_IMAGE = some conversion to temp file
## pytesseract.image_to_string(Image.open(TEMP_IMAGE))
memoryFile.close()
I thought of using cStringIO or tempfile but I cannot figure out how to use them for this purpose.
How can solve this issue?
There's a couple options for doing this, the more compatible way given the code you supplied is to store the images temporarily in that directory and then delete them after reading the text using pytesseract. I create a wand type image to extract each image from the PDF individually, then convert it to a PIL type image for pytesseract. Here's the code I used for this with the detected text bring written to an array 'text' where each element is an image in the original PDF, I also updated some of your imports to make it compatible with Python3 (cStringIO->io and urllib2->urllib.request).
import PyPDF2
import os
import pytesseract
from wand.image import Image
from PIL import Image as PILImage
import urllib.request
import io
with urllib.request.urlopen('file:///home/user/Documents/TestDocs/test.pdf') as response:
pdf_read = response.read()
pdf_im = PyPDF2.PdfFileReader(io.BytesIO(pdf_read))
text = []
for p in range(pdf_im.getNumPages()):
with Image(filename='file:///home/user/Documents/TestDocs/test.pdf' + '[' + str(p) + ']') as img:
with Image(image = img) as converted: #Need second with to convert SingleImage object from wand to Image
converted.save(filename=tempFile_Location)
text.append(pytesseract.image_to_string(PILImage.open(tempFile_Location)))
os.remove(tempFile_Location)
Alternatively, if you want to avoid creating and deleting a temporary file for each image you can use numpy and OpenCV to extract the image as a blob, convert it to a numpy array and then turn it into a PIL image for pytesseract to perform OCR on (reference)
import PyPDF2
import os
import pytesseract
from wand.image import Image
from PIL import Image as PILImage
import urllib.request
import io
import numpy as np
import cv2
with urllib.request.urlopen('file:///home/user/Documents/TestDocs/test.pdf') as response:
pdf_read = response.read()
pdf_im = PyPDF2.PdfFileReader(io.BytesIO(pdf_read))
text = []
for p in range(pdf_im.getNumPages()):
with Image(filename=('file:///home/user/Documents/TestDocs/test.pdf') + '[' + str(p) + ']') as img:
img_buffer=np.asarray(bytearray(img.make_blob()), dtype=np.uint8)
retval = cv2.imdecode(img_buffer, cv2.IMREAD_GRAYSCALE)
text.append(pytesseract.image_to_string(PILImage.fromarray(retval)))