convert linux python multiprocessing to windows - python

I would like to use this Linux Python script in Windows Python.
how to rewrite it ? The part to be rewritten in multiprocessing part.
from __future__ import print_function
from collections import Counter
import glob
import multiprocessing
import os
import re
import sys
import time
def create_data(filepath):
...
return values
filepaths = glob.glob('*/*.txt')
num_tasks = len(filepaths)
p = multiprocessing.Pool()
results = p.imap(create_data, filepaths)
while (True):
completed = results._index
print("\r--- Completed {:,} out of {:,}".format(completed, num_tasks), end='')
sys.stdout.flush()
time.sleep(1)
if (completed == num_tasks): break
p.close()
p.join()
df_full = pd.DataFrame(list(results))
print()
thanks for your help.

Related

Multiprocessing using GetPass

Running my multiprocessing script while using the getpass function in python (import getpass): and
I keep getting this error message. I am also running this code in a .py file on my command prompt terminal, using windows 10 as well.
error message
The following is my code:
import time
import multiprocessing
from multiprocessing import Pool
from multiprocessing import freeze_support
import getpass
import jaydebeapi
import pandas as pd
import numpy as np
from multiprocessing import Process, freeze_support, set_start_method
class multiprocess:
def __init__(self):
pass
def test(self, Batch_test):
pw_2 = getpass.getpass(prompt="Password", stream=False)
PML = jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver', 'jdbc:db2://he3qlxvtdbs957.fhlmc.com:50001/PMLFDB2',
['f408195', pw_2], 'C:/JDBC/db2jcc.jar')
PML = PML.cursor()
Batch_query = "select id_hstrcl_data_bch_load_frst_evnt as btch_strt, id_hstrcl_data_bch_load_last_evnt as btch_end from UDBADM.hstrcl_data_bch_load WHERE ID_HSTRCL_DATA_BCH_LOAD BETWEEN 1 and 2"
PML.execute(Batch_query)
Batch_records = PML.fetchall()
Batch_records = pd.DataFrame(Batch_records)
for ind in Batch_test:
print(ind)
first_evnt = Batch_records.iloc[ind, 0]
last_evnt = Batch_records.iloc[ind, 1]
PML_loan_Query = "select CAST(b.id_lpa_alt_loan AS INT) AS id_lpa_alt_loan from udbadm.pml_lst_cmpltd_trans_mtch a join udbadm.lpa_altv_loan_idtn b on a.id_evnt = b.id_evnt where b.cd_lpa_alt_loan_idtn = 'HewlettPackardGeneratedTransaction' and a.id_evnt between ? and ?"
PML.execute(PML_loan_Query, (first_evnt, last_evnt))
loan_records = PML.fetchall()
return loan_records
def run(self):
processes = []
for i in range(2):
p = multiprocessing.Process(target=self.test, args=(i,))
processes.append(p)
for p in processes:
p.start()
if __name__ == '__main__':
a = multiprocess()
a.run()

after using pyinstaller to convert py to exe it doesn't work

I want to monitor one area but after converting it doesn't work.
There is no message when I used cmd to find some error.
when I start it, it just blink few minutes and then black out.
It work well in python.
please help.
here is the code.
import multiprocessing
from re import A
from PIL import Image
import pytesseract
import time
import threading
from PIL import ImageGrab
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
import pyautogui
import keyboard
import winsound
from multiprocessing import Process
def screenshot():
pyautogui.screenshot("C:\\Users\\youngseb\\Desktop\\screenshot1.png", region = region1)
img1 = Image.open("C:\\Users\\youngseb\\Desktop\\screenshot1.png")
A1 = pytesseract.image_to_string(img1,lang='kor+eng')
#print(T)
strings = A1.split()
print(strings)
time.sleep(5)
pyautogui.screenshot("C:\\Users\\youngseb\\Desktop\\screenshot1.png", region = region1)
img1 = Image.open("C:\\Users\\youngseb\\Desktop\\screenshot1.png")
A2 = pytesseract.image_to_string(img1,lang='kor+eng')
strings = A2.split()
print(strings)
if (A1 == A2):
winsound.Beep(2000,500)
print("ERROR")
else :
time.sleep(0.5)
threading.Timer(5, screenshot).start()
if __name__ == '__main__' :
P1 = Process(target=screenshot)
P1.start()
P1.join()
region1 = (572,333,35,15)

Logic put in a Python class runs indefinitely

I have logic for bulk calculating image hash.
Script 1
import dhash
import glob
from PIL import Image
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
PATH = '*.jpg'
def makehash(t):
filename, d = t
with Image.open(filename) as image:
image.draft('L', (32,32))
row, col = dhash.dhash_row_col(image)
d[filename] = dhash.format_hex(row, col)
def main():
with Manager() as manager:
d = manager.dict()
with ProcessPoolExecutor() as executor:
executor.map(makehash, [(jpg, d) for jpg in glob.glob(PATH)])
print(d)
if __name__ == '__main__':
main()
For around 10,000 JPEGs, it runs for less than a minute. However, if I put the logic into a class, it runs indefinitely:
import numpy
import cv2
import glob
import os
import dhash
from timeit import default_timer as timer
from datetime import timedelta
from wand.image import Image
from itertools import chain
from alive_progress import alive_bar
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
FOLDER_DUPLICATE = 'duplicate'
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
def create_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def print_elapsed(sec):
print("Elapsed Time: ", timedelta(seconds=sec))
class ImgToolkit:
file_list = {}
def __init__(self):
# initialize something
with Manager() as manager:
self.file_list = manager.dict()
print("loaded")
def find_duplicates(self):
if os.path.exists(FOLDER_DUPLICATE) and listdir_nohidden(FOLDER_DUPLICATE):
print("ERROR: Duplicate folder exists and not empty. Halting")
else:
start = timer()
print("Phase 1 - Hashing")
imgs = glob.glob('*.jpg')
def get_photo_hashes_pillow(t):
filename, self.file_list = t
with Image.open(filename) as image:
image.draft('L', (32, 32))
row, col = dhash.dhash_row_col(image)
self.file_list[filename] = dhash.format_hex(row, col)
with ProcessPoolExecutor() as executor:
executor.map(get_photo_hashes_pillow, [(jpg, self.file_list) for jpg in imgs])
print(self.file_list)
end = timer()
print_elapsed(end-start)
And I use the class as follow:
from imgtoolkit import imgtoolkit
if __name__ == '__main__':
kit = imgtoolkit.ImgToolkit()
kit.find_duplicates()
What did I miss? I am quite new in Python.
UPDATE
I found that the function get_photo_hashes_pillow never get called, as I put a print() line in the 1st line of function. But why?

Continue script after schedule until python

I would like to continue my python script after the schedule.every(x).seconds.until(x).do(job), but my program end the function until the x time and doesn't keep going after.
Here is my program :
import requests
import json
import pickle
import schedule
import sys
import time
import pandas as pd
from datetime import datetime, timedelta
from threading import Timer
import matplotlib
from datetime import datetime
import matplotlib.pyplot as plt
from colorama import Fore, Back, Style
import math
import numpy as np
import os
os.environ["PATH"] += os.pathsep + '/Library/TeX/texbin'
key = 'MY_API_KEY'
adress = 'https://api.openweathermap.org/data/2.5/weather'
params = {'appid':key, 'q': 'Lausanne', 'units':'metric'}
def somme(tab):
s=0
for i in range(len(tab)):
s=s+tab[i]
return s
def moyenne(tab):
return somme(tab)/len(tab)
tab=[]
tab2=[]
def function(tab,tab2):
response = requests.get(adress, params=params)
weather = response.json()
temp = weather['main']['temp']
print(temp)
tab.append(temp)
now = datetime.now()
time = now.strftime("%H:%M:%S")
tab2.append(time)
print(tab)
print(tab2)
print(moyenne(tab))
function(tab,tab2)
schedule.every(10).seconds.until("20:00").do(function, tab, tab2)
while True :
schedule.run_pending()
time.sleep(1)
fig = plt.figure(1,figsize=(9, 7))
ax = fig.add_subplot()
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.plot(tab2,tab, 'b.')
plt.show()
I would like that my program plot the graph after it had done the schedule. Is it possible ?
Thank you for your help !
EDIT :
Thank you to Tim Roberts for the answer in the comments !
I just have to change my loop like this while datetime.now().hour < 20:

How to download images while numbering (with multiprocessing)

I want to save the files in order of the list. (like bbb.jpg->001.jpg, aaa.jpg -> 002.jpg...)
Because of alphabetical order, files are not saved as I want. (like aaa.jpg, bbb.jpg, ccc.jpg...)
There is also a way to sort files chronologically, but it is also impossible to use multiprocessing.
So my question is how can I save the files in the order I want, or in the name I want.
Here is my code.
from urllib.request import Request, urlopen
import urllib.request
import os
import os.path
import re
import time
from multiprocessing import Pool
import multiprocessing
from functools import partial
mylist = ['https://examsite.com/bbb.jpg',
'https://examsite.com/aaa.jpg',
'https://examsite.com/ddd.jpg',
'https://examsite.com/eee.jpg',
'https://examsite.com/ccc.jpg']
def image_URL_download (path, html):
originNames = (f"{html}".split)('/')[-1]
PathandNames = (path + str(originNames))
req = urllib.request.Request(html, headers={'User-Agent': 'Mozilla/5.0'})
urlopen = request.urlopen(req).read()
with open(PathandNames,'wb') as savefile2:
savefile2.write(urlopen)
print (f"download {originNames}")
if __name__ == "__main__":
start = time.time()
path = './down'
pool = multiprocessing.Pool(processes=4)
img_down = partial(image_URL_download, path)
pool.map(img_down, mylist)
pool.close()
pool.join()
print("DONE! time :", time.time() - start)
Here is a full example that takes a bunch of images (thumbnails, here) from Wikimedia commons images. It saves them numbered 000.jpg, 001.jpg, etc. (in /tmp, but of course adjust as needed). Bonus: it displays an animated progress bar during download, courtesy tqdm:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
tld = 'https://commons.wikimedia.org'
url = '/wiki/Category:Images'
soup = BeautifulSoup(requests.get(urljoin(tld, url)).content)
imglist = [x.get('src') for x in soup.find_all('img', src=True)]
imglist = [urljoin(tld, x) for x in imglist if x.endswith('.jpg')]
def load_img(i_url):
i, url = i_url
img = requests.get(url).content
with open(f'/tmp/{i:03d}.jpg', 'wb') as f:
f.write(img)
return True
def load_all(imglist):
with ThreadPoolExecutor() as executor:
results = list(tqdm(
executor.map(load_img, enumerate(imglist)),
total=len(imglist), unit=' images'))
return results
results = load_all(imglist)

Categories

Resources