Pytesseract really slow - python

so I'm trying to read out text from MS Teams and use that text to make inputs on the keyboard.
Right now, I work with the threading module to have one thread for the input and one thread for the image_to_string. Following is the function for the image_to_string.
def imToString():
global message
print("Image getting read")
pytesseract.pytesseract.tesseract_cmd ='C:\\Users\\gornicec\\AppData\\Local\\Programs\\Tesseract-OCR\\tesseract.exe'
while(True):
print("preIMGgrab")
cap = ImageGrab.grab(bbox=(177, 850, 283, 881))
grayCap = cv2.cvtColor(np.array(cap), cv2.COLOR_BGR2GRAY)
print("postIMGgrab")
t = time.perf_counter()
print("preMSG" + str(t))
message = pytesseract.image_to_string(
grayCap,
lang ='deu',config='--psm 6')
print(str(message) + "was read" + str(time.perf_counter() - t))
I don't know how but it takes about 8 seconds to read an image thats 1000 pixels big. I need this to be at highest 2 seconds. I'll add the whole code at the end. If there is any way to make it faster or to do it differently please tell me so.
WHOLE CODE:
import numpy as np
import time
import pytesseract
from win32gui import GetWindowText, GetForegroundWindow
import win32api
import cv2
import pyautogui
from PIL import ImageGrab
import threading
from ahk import AHK
import keyboard
message = ""
ahk = AHK(executable_path='C:\\Program Files\\AutoHotkey\\AutoHotkey.exe')
def Controls():
global message
while True:
booleanVal = True
if booleanVal:
#imToString()
print("message")
#print("rechts" in message.lower())
#print(f'LĂ„NGE: {len(message)}')
if "vorne" in message.lower():
# Control(message, 'w')
ahk.key_press('w')
#message = ""
if "hinten" in message.lower():
# Control(message, 's')
ahk.key_press('s')
#message = ""
if "links" in message.lower():
# Control(message, 'a')
ahk.key_press('a')
#message = ""
if "rechts" in message.lower():
# Control(message, 'd')
#print("HAHAHA")
ahk.key_press('d')
#message = ""
if "greif" in message.lower():
ahk.key_press('space')
#message = ""
time.sleep(0.5)
#IMGTOSTRING---
controls = threading.Thread(target=Controls)
controls.start()
grab = threading.Thread(target=imToString)
grab.start()

pytesseract is not suit for large amount of images or images that are already in memory, its write them to a file and then pass the file path to tesseract cli, if you want to improve the performance of you script try using library that works directly with tesseract api.
like this: https://pypi.org/project/tess-py-api/

Related

Python : how to have preview of DSLR camera

I came here as I am lost.
I have a code which basically provides a preview on screen before a picture is taken. The code runs on 2 camera (it doesn't make sense I know).
Pi camera : for preview
DSLR camera : for taking the pictures
The scripts works as expected. The issue is that the PIcamera zoom is not aligned with the DSLR.
I know it is possible to get the DSLR preview on screen which would be better. However I don't know how to do it. As I am new to python, I read as much as I could. I read solution with VLC but I didn't understand the specificities. I read the gphoto2 help but got lost in the technical parts. Hence I'd highly appreciate if someone can help me out or point me to the correct direction.
The DSLR camera is a Canon EOS D60 which should be compatible for screen preview.
I attached below the code
from picamera import PiCamera
import time
from time import sleep
import logging
import sys
import gphoto2 as gp
import locale
photonr = 1
countdown = 5
FolderRaw='/home/pi/'
def TakePicture():
global rafale
try:
rafale +=1
except Exception as e:
rafale =1
camera1 = PiCamera()
locale.setlocale(locale.LC_ALL, '')
logging.basicConfig(
format='%(levelname)s: %(name)s: %(message)s', level=logging.WARNING)
callback_obj = gp.check_result(gp.use_python_logging())
camera2 = gp.Camera()
camera2.init()
i = 1
while i < photonr+1:
preview = camera1.start_preview()
preview.fullscreen = True
picture_time = time.strftime("%Y-%m-%d")
global image
image = picture_time+ '_' + str(rafale) + '_' + str(i)
if i == 1 :
countdown2 = countdown +1
else :
countdown2 = 3
for counter in range(countdown2, 0, -1):
camera1.annotate_text = str(counter)
camera1.annotate_text_size = 120
sleep(1)
camera1.annotate_text = ''
print('Capturing image')
file_path = camera2.capture(gp.GP_CAPTURE_IMAGE)
print('Camera file path: {0}/{1}'.format(file_path.folder, file_path.name))
target = FolderRaw+image +'.jpg'
print('Copying image to', target)
camera_file = camera2.file_get(
file_path.folder, file_path.name, gp.GP_FILE_TYPE_NORMAL)
camera_file.save(target)
#camera.capture(FolderRaw+image +'.jpg')
camera1.stop_preview()
print ("Raw picture taken and saved on " + image )
i += 1
camera1.close()
camera2.exit()
TakePicture()
Thank you beforehand for any tips/direction/help

Detect beep sound from Audio file using python

Detect beep sound from Audio file using python
this code I found from somewhere but it is not giving the actual beep result
means in the audio where beep is not there then also this code is showing beep in it
from moviepy.editor import *
import matplotlib.pyplot as plt
import cv2
#from time import sleep
import sounddevice as sd
from scipy.io import wavfile
import numpy as np
filename = 'C:/Users/YahyaSirguroh/Downloads/output.mp4'
video = VideoFileClip(filename)
audio = video.audio
duration = video.duration
audio.write_audiofile("audio.wav")
#sleep(0.3)
samplerate, data = wavfile.read('audio.wav')
step = 30
audio_signal = []
cnt = 0
flag = 0
text = ''
for t in range(int(duration*step)):
t = t/step
if cnt:
flag+=1
if t > audio.duration or t > video.duration: break
audio_frame = audio.get_frame(t) #numpy array representing mono/stereo values
audio_signal.extend(list(audio_frame))
if (audio_frame>0.6).sum()==2:
cnt+=1
if cnt>=2:
print('beep detected at %5.2f' %(t))
text = 'beep detected at %d' %(np.round(t))
if flag>=4:
cnt=0
flag=0

after using pyinstaller to convert py to exe it doesn't work

I want to monitor one area but after converting it doesn't work.
There is no message when I used cmd to find some error.
when I start it, it just blink few minutes and then black out.
It work well in python.
please help.
here is the code.
import multiprocessing
from re import A
from PIL import Image
import pytesseract
import time
import threading
from PIL import ImageGrab
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
import pyautogui
import keyboard
import winsound
from multiprocessing import Process
def screenshot():
pyautogui.screenshot("C:\\Users\\youngseb\\Desktop\\screenshot1.png", region = region1)
img1 = Image.open("C:\\Users\\youngseb\\Desktop\\screenshot1.png")
A1 = pytesseract.image_to_string(img1,lang='kor+eng')
#print(T)
strings = A1.split()
print(strings)
time.sleep(5)
pyautogui.screenshot("C:\\Users\\youngseb\\Desktop\\screenshot1.png", region = region1)
img1 = Image.open("C:\\Users\\youngseb\\Desktop\\screenshot1.png")
A2 = pytesseract.image_to_string(img1,lang='kor+eng')
strings = A2.split()
print(strings)
if (A1 == A2):
winsound.Beep(2000,500)
print("ERROR")
else :
time.sleep(0.5)
threading.Timer(5, screenshot).start()
if __name__ == '__main__' :
P1 = Process(target=screenshot)
P1.start()
P1.join()
region1 = (572,333,35,15)

Game bot error (python chrome dinousor game error)

I followed a tut Click me! and i get un-expected results: my code
from PIL import ImageGrab, ImageOps
import pyautogui
import time
from numpy import *
class Cordinates():
replayBtn = (960,355)
dinosaur = (784,375)
#770x360, 770x365
def restartGame():
pyautogui.click(Cordinates.replayBtn)
def pressSpace():
pyautogui.keyDown('space')
time.sleep(0.05)
print("Jump")
pyautogui.keyUp('space')
def imageGrab():
box = (Cordinates.dinosaur[0]+435, Cordinates.dinosaur[1]+25,
Cordinates.dinosaur[1]+335, 10)
image = ImageGrab.grab(box)
grayImage = ImageOps.grayscale(image)
a = array(grayImage.getcolors())
return a.sum()
def main():
restartGame()
while True:
if imageGrab()!=1447:
#pressSpace()
print(imageGrab)
time.sleep(0.1)
time.sleep(2)
main()
and the print i added for debug gives me
<function imageGrab at 0x079CBD68>
What can i fix to make this work?
here
if imageGrab()!=1447:
#pressSpace()
print(imageGrab)
time.sleep(0.1)
you print(imageGrab) like variable but you need to print like method
print(imageGrab())
you printed out a function, not a result of a function

killed 9 while using python and opencv2 to read multiple video files into a list

The overall problem I'm trying to tackle is I want to find out if videoA is a subset of videoB. So if videoA is 2 seconds clip and videoB is 40s, do those two seconds occur in those 40s. To do this I'm reading video files with VideoCapture, and saving each frame of my video in a dictionary. But while reading videos I see this:
Videos/viper_SpineyBitterGoatOhMyDog.mp4
Videos/tsm_theoddone_CoyThirstyAlmondDoggo.mp4
Videos/wingsofdeath_HelplessOilyYogurtGOWSkull.mp4
Videos/rinnieriot_AbstruseEnjoyableOkapiCopyThis.mp4
Videos/imls_UnusualHelplessDogFloof.mp4
Videos/4.mp4
Videos/solorenektononly_LaconicFamousEggplantTwitchRaid.mp4
Videos/gripex90_CheerfulNeighborlyJayTheThing.mp4
Videos/tarzaned_GrossFlirtyMooseFutureMan.mp4
Videos/imaqtpie_LightPleasantPhoneSquadGoals.mp4
Killed: 9
from moviepy.editor import *
import imageio
import json
from moviepy.video.io.VideoFileClip import VideoFileClip
import random
import cv2
import os
import threading
from skimage.measure import compare_ssim
import argparse
import imutils
start = datetime.datetime.now()
videoPaths = self.getListOfDownloadedVideos()
videosByFrame = {}
for path in videoPaths:
print path
images = []
vidcap = cv2.VideoCapture(path)
success = True
while success:
#print "Before READ"
success,image = vidcap.read()
#print "After READ"
images.append(image)
videosByFrame[path] = images
cv2.destroyAllWindows()
vidcap.release()
Is there something in vidcap.read() that would cause it to kill my python script?

Categories

Resources