I would like to display the text from an image and then have the coordinates determined for each word in the text. For this I use the following code:
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import json
import math
import PIL
from PIL import ImageDraw
import matplotlib.pyplot as plt
model = ocr_predictor (det_arch='db_resnet50', reco_arch='crnn_vgg16_bn',pretrained=True)
bildpfad = "C:/Users/b2/Documents/Analysetext_1.png"
bild = DocumentFile.from_images (bildpfad)
ergebnis = model (bild)
output = ergebnis.export ()
with open ("C:/Users/b2/Documents/docTR_OCR_output.json", "w") as f:
f.write (json.dumps (output, indent=1))
f.close ()
ergebnis.show (bild)
# Geometrische Koordinaten
for object_1 in output ['pages'] [0] ["blocks"]:
for object_2 in object_1 ["lines"]:
for object_3 in object_2 ["words"]:
print ("{}: {}".format (object_3 ["geometry"], object_3 ["value"]))
# Output like [x_min, x_max, y_min, y_max]
# Geographische Koordinaten
def convert_coordinates (geometry, page_dim):
len_x = page_dim[1]
len_y = page_dim[0]
(x_min, y_min) = geometry[0]
(x_max, y_max) = geometry[1]
x_min = math.floor(x_min * len_x)
x_max = math.ceil(x_max * len_x)
y_min = math.floor(y_min * len_y)
y_max = math.ceil(y_max * len_y)
return [x_min, x_max, y_min, y_max]
def get_coordinates (output):
page_dim = output ['pages'][0]["dimensions"]
text_coordinates = []
for object_1 in output ['pages'][0]["blocks"]:
for object_2 in object_1 ["lines"]:
for object_3 in object_2 ["words"]:
converted_coordinates = convert_coordinates (object_3 ["geometry"], page_dim)
print ("{}: {}".format (converted_coordinates, object_3 ["values"]))
text_coordinates.append (converted_coordinates)
return text_coordinates
graphical_coordinates = get_coordinates (output)
print (graphical_coordinates)
# Plot graphische Koordinaten
def draw_bounds(bild, bound):
draw = ImageDraw.Draw(bild)
for b in bound:
p0, p1, p2, p3 = [b[0],b[2]], [b[1],b[2]], \
[b[1],b[3]], [b[0],b[3]]
draw.line([*p0,*p1,*p2,*p3,*p0], fill='blue', width=2)
return bild
bild = PIL.Image.open(bildpfad)
result_image = draw_bounds(bild, graphical_coordinates)
plt.figure(figsize=(15,15))
plt.imshow(result_image)
At the beginning I had the following error:
OSError: cannot load library 'gobject-2.0-0': error 0x7e. Additionally, ctypes.util.find_library() did not manage to locate a library called 'gobject-2.0-0'
I was then able to fix this error and now the following is displayed:
ModuleNotFoundError: No module named 'weasyprint.text.ffi'; 'weasyprint.text' is not a package
The whole output is:
Traceback (most recent call last):
File "c:/Users/b2/Documents/Übungen/Distanzberechnungen/Textdetection_Übung_3.py", line 3, in <module>
from doctr.io import DocumentFile
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\__init__.py", line 1, in <module>
from . import datasets, io, models, transforms, utils
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\datasets\__init__.py", line 3, in <module>
from .generator import *
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\datasets\generator\__init__.py", line 4, in <module>
from .tensorflow import *
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\datasets\generator\tensorflow.py", line 8, in <module>
from .base import _CharacterGenerator, _WordGenerator
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\datasets\generator\base.py", line 11, in <module>
from doctr.io.image import tensor_from_pil
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\io\__init__.py", line 2, in <module>
from .html import *
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\doctr\io\html.py", line 8, in <module>
from weasyprint import HTML
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\weasyprint\__init__.py", line 315, in <module>
from .css import preprocess_stylesheet # noqa isort:skip
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\weasyprint\css\__init__.py", line 25, in <module>
from . import computed_values, counters, media_queries
File "C:\Users\b2\anaconda3\envs\Python_3_6\lib\site-packages\weasyprint\css\computed_values.py", line 9, in <module>
from ..text.ffi import ffi, pango, units_to_double
ModuleNotFoundError: No module named 'weasyprint.text.ffi'; 'weasyprint.text' is not a package
I hope you can help me to fix this error. Thank you in advance
Related
ok so i tried this
import cv2 as cv
cvNet = cv.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
img = cv.imread('example.jpg')
rows = img.shape[0]
cols = img.shape[1]
cvNet.setInput(cv.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False))
cvOut = cvNet.forward()
for detection in cvOut[0,0,:,:]:
score = float(detection[2])
if score > 0.3:
left = detection[3] * cols
top = detection[4] * rows
right = detection[5] * cols
bottom = detection[6] * rows
cv.rectangle(img, (int(left), int(top)), (int(right), int(bottom)), (23, 230, 210), thickness=2)
cv.imshow('img', img)
cv.waitKey()
and it request me error like this
Traceback (most recent call last):
File "C:\WINDOWS\System32\numpy_init_.py", line 140, in
from . import _distributor_init
File "C:\WINDOWS\System32\numpy_distributor_init.py", line 34, in
from . import mklinit
ImportError: cannot import name 'mklinit' from partially initialized module 'numpy' (most likely due to a circular import) (C:\WINDOWS\System32\numpy_init.py)
Traceback (most recent call last):
File "C:\Users\phat\Downloads\start.py", line 1, in
import cv2 as cv
File "C:\Users\phat\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\cv2_init.py", line 5, in
from .cv2 import *
ImportError: numpy.core.multiarray failed to import
can anyone help me?
I have the following code:
https://github.com/marcomusy/vedo/blob/master/examples/basic/colorlines.py
"""Color lines by a scalar"""
from vedo import *
pts1 = [(sin(x/8), cos(x/8), x/5) for x in range(25)]
l1 = Line(pts1).c('black')
l2 = l1.clone().rotateZ(180).shift(1,0,0)
dist = mag(l1.points()-l2.points()) # make up some scalar values
# The trick here is to think that the "body" of a line is a cell
# so we can color cells as we do for any other polygonal mesh:
lines = Lines(l1, l2).lw(4).cmap('Accent', dist, on='cells')
lines.addScalarBar(title='distance') # or e.g.:
# lines.addScalarBar3D(title='distance').scalarbar.rotateX(90).pos(1,1,2)
show(l1,l2, lines, __doc__, axes=1, bg2='lightblue', viewup='z')
However, this gives me the following error:
Traceback (most recent call last):
File "vedo.py", line 2, in <module>
from vedo import *
File "/Users/prikshetsharma/Documents/clotorch/src/clotorch/flight/vedo.py", line 4, in <module>
pts1 = [(sin(x/8), cos(x/8), x/5) for x in range(25)]
File "/Users/prikshetsharma/Documents/clotorch/src/clotorch/flight/vedo.py", line 4, in <listcomp>
pts1 = [(sin(x/8), cos(x/8), x/5) for x in range(25)]
NameError: name 'sin' is not defined
However, sin is imported from numpy in the init.py file of vedo, so it should be defined in this file.
Have a section of code, but it says that my float can't be interpreted as an integer, i don't know how to change this though - anyone have an idea?
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame # to build our table of data from the video. import glob import pims # to import the video. from pims import pipeline # a decorator to allow dynamic loading of videos
import scipy.ndimage as nd # for Gaussian filter.
import trackpy as tp # for object tracking functions.
crop = (slice(100, 1000), slice(500, 1250), 0)
thresh = 130
x = glob.glob('/Users/charlieargent/Extended Report /Potassium nitrate 1 hour//')
def readvid(x):
frames = pims.Video(x)
return(frames)
#pipeline
def crop_frame(frame):
return frame[crop]
#pipeline
def filter_frame(frame):
return nd.filters.gaussian_filter( crop_frame(frame), [2, 2] )
#pipeline
def binary_inv_frame(frame):
return ( (255 - filter_frame(frame) > thresh)*255 ).astype('uint8')
for i in x:
find_cells = tp.batch( binary_inv_frame(readvid(i)), 21, minmass = 200, invert = False )
tracks = tp.link_df( find_cells, 25, memory = 0 )
tracks_long = tp.filter_stubs( tracks, 5 )
msds = tp.imsd( tracks_long, 1, 25 )
y = i.split("/")[-3], i.split("/")[-2], i.split("/")[-1]
msds.to_csv("{}.csv".format(y))
Error:
File "", line 1, in
runfile('/Users/charlieargent/Extended Report /Analysis/untitled1.py', wdir='/Users/charlieargent/Extended Report /Analysis')
File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/charlieargent/Extended Report /Analysis/untitled1.py", line 45, in
find_cells = tp.batch( binary_inv_frame(readvid(i)), 21, minmass = 200, invert = False )
File "/anaconda3/lib/python3.7/site-packages/slicerator/init.py", line 687, in process
return Pipeline(proc_func, *ancestors)
File "/anaconda3/lib/python3.7/site-packages/slicerator/init.py", line 415, in init
self._len = len(ancestors[0])
TypeError: 'float' object cannot be interpreted as an integer
I'm trying to run script like below in atom (or in pycharm)
import numpy as np
def bar(num_elems):
restrained_dofs = 0
m = np.array([[2,1], [1,2]]) / (6. * num_elems)
k = np.array([[1,-1],[-1,1]]) * num_elems
M = np.zeros((num_elems+1,num_elems+1))
K = np.zeros((num_elems+1,num_elems+1))
for i in range(num_elems):
M_temp = np.zeros((num_elems+1,num_elems+1))
K_temp = np.zeros((num_elems+1,num_elems+1))
M_temp[i:i+2,i:i+2] = m
K_temp[i:i+2,i:i+2] = k
M += M_temp
K += K_temp
for dof in range(restrained_dofs):
for i in [0,1]:
M = np.delete(M, dof, axis=i)
K = np.delete(K, dof, axis=i)
bar(1)
and I recive following terminal output
Traceback (most recent call last):
File "D:/Programowanie/Python/bar_elem.py", line 1, in <module>
import numpy as np
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\__init__.py", line 142, in <module>
from . import add_newdocs
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\add_newdocs.py", line 13, in <module>
from numpy.lib import add_newdoc
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\lib\__init__.py", line 8, in <module>
from .type_check import *
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\lib\type_check.py", line 11, in <module>
import numpy.core.numeric as _nx
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\core\__init__.py", line 35, in <module>
from . import _internal # for freeze programs
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\core\_internal.py", line 12, in <module>
from numpy.compat import basestring, unicode
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\compat\__init__.py", line 14, in <module>
from . import py3k
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\site-packages\numpy\compat\py3k.py", line 14, in <module>
from pathlib import Path
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\pathlib.py", line 13, in <module>
from urllib.parse import quote_from_bytes as urlquote_from_bytes
File "C:\Users\Admin\AppData\Local\Programs\Python\Python37\lib\urllib\parse.py", line 597, in <module>
_asciire = re.compile('([\x00-\x7f]+)')
AttributeError: module 're' has no attribute 'compile'
But the same script is working correctly while using Spyder3 or in Terminal.
I'm running script wit Script packacge in Atom, but other packages gives the same result.
Python 3.7.1,
Numpy 1.15.4
The problem was that the script was placed in a folder named Python, after moving it to another location everything works fine
I need to find the pvalue. for a multiLinearRegression. However, I can not use import statsmodels.formula.api as sm. It throws a error for me :( is there a way to find the pvalue without using statsmodels? this is what I have so far.
import numpy
from scipy.stats import pearsonr
data = numpy.genfromtxt("2-2.4.txt",delimiter=",")
x = data[:,6]
x2 = data[:,7]
x3 = data[:,8]
x4 = data[:,9]
y = data[:,-1]
r, p = pearsonr(x,x2,x3,x4, y) # how do I find the pvalue when working with multiple variables
error I get with statsmodel when importing
File "C:\Python27\lib\site-packages\statsmodels\formula\__init__.py", line 4, in <module>
from formulatools import handle_formula_data
File "C:\Python27\lib\site-packages\statsmodels\formula\formulatools.py", line 1, in <module>
import statsmodels.tools.data as data_util
File "C:\Python27\lib\site-packages\statsmodels\tools\__init__.py", line 1, in <module>
from tools import add_constant, categorical
File "C:\Python27\lib\site-packages\statsmodels\tools\tools.py", line 14, in <module>
from pandas import DataFrame
File "C:\Python27\lib\site-packages\pandas\__init__.py", line 39, in <module>
from pandas.core.api import *
File "C:\Python27\lib\site-packages\pandas\core\api.py", line 10, in <module>
from pandas.core.groupby import Grouper
File "C:\Python27\lib\site-packages\pandas\core\groupby.py", line 18, in <module>
from pandas.core.frame import DataFrame
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 39, in <module>
from pandas.core.series import Series
File "C:\Python27\lib\site-packages\pandas\core\series.py", line 2995, in <module>
import pandas.tools.plotting as _gfx # noqa
File "C:\Python27\lib\site-packages\pandas\tools\plotting.py", line 134, in <module>
if _mpl_ge_1_5_0():
File "C:\Python27\lib\site-packages\pandas\tools\plotting.py", line 129, in _mpl_ge_1_5_0
return (matplotlib.__version__ >= LooseVersion('1.5') or
File "C:\Python27\lib\distutils\version.py", line 296, in __cmp__
return cmp(self.version, other.version)
AttributeError: 'unicode' object has no attribute 'version'