Multiprocessing using GetPass - python

Running my multiprocessing script while using the getpass function in python (import getpass): and
I keep getting this error message. I am also running this code in a .py file on my command prompt terminal, using windows 10 as well.
error message
The following is my code:
import time
import multiprocessing
from multiprocessing import Pool
from multiprocessing import freeze_support
import getpass
import jaydebeapi
import pandas as pd
import numpy as np
from multiprocessing import Process, freeze_support, set_start_method
class multiprocess:
def __init__(self):
pass
def test(self, Batch_test):
pw_2 = getpass.getpass(prompt="Password", stream=False)
PML = jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver', 'jdbc:db2://he3qlxvtdbs957.fhlmc.com:50001/PMLFDB2',
['f408195', pw_2], 'C:/JDBC/db2jcc.jar')
PML = PML.cursor()
Batch_query = "select id_hstrcl_data_bch_load_frst_evnt as btch_strt, id_hstrcl_data_bch_load_last_evnt as btch_end from UDBADM.hstrcl_data_bch_load WHERE ID_HSTRCL_DATA_BCH_LOAD BETWEEN 1 and 2"
PML.execute(Batch_query)
Batch_records = PML.fetchall()
Batch_records = pd.DataFrame(Batch_records)
for ind in Batch_test:
print(ind)
first_evnt = Batch_records.iloc[ind, 0]
last_evnt = Batch_records.iloc[ind, 1]
PML_loan_Query = "select CAST(b.id_lpa_alt_loan AS INT) AS id_lpa_alt_loan from udbadm.pml_lst_cmpltd_trans_mtch a join udbadm.lpa_altv_loan_idtn b on a.id_evnt = b.id_evnt where b.cd_lpa_alt_loan_idtn = 'HewlettPackardGeneratedTransaction' and a.id_evnt between ? and ?"
PML.execute(PML_loan_Query, (first_evnt, last_evnt))
loan_records = PML.fetchall()
return loan_records
def run(self):
processes = []
for i in range(2):
p = multiprocessing.Process(target=self.test, args=(i,))
processes.append(p)
for p in processes:
p.start()
if __name__ == '__main__':
a = multiprocess()
a.run()

Related

Logic put in a Python class runs indefinitely

I have logic for bulk calculating image hash.
Script 1
import dhash
import glob
from PIL import Image
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
PATH = '*.jpg'
def makehash(t):
filename, d = t
with Image.open(filename) as image:
image.draft('L', (32,32))
row, col = dhash.dhash_row_col(image)
d[filename] = dhash.format_hex(row, col)
def main():
with Manager() as manager:
d = manager.dict()
with ProcessPoolExecutor() as executor:
executor.map(makehash, [(jpg, d) for jpg in glob.glob(PATH)])
print(d)
if __name__ == '__main__':
main()
For around 10,000 JPEGs, it runs for less than a minute. However, if I put the logic into a class, it runs indefinitely:
import numpy
import cv2
import glob
import os
import dhash
from timeit import default_timer as timer
from datetime import timedelta
from wand.image import Image
from itertools import chain
from alive_progress import alive_bar
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
FOLDER_DUPLICATE = 'duplicate'
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
def create_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def print_elapsed(sec):
print("Elapsed Time: ", timedelta(seconds=sec))
class ImgToolkit:
file_list = {}
def __init__(self):
# initialize something
with Manager() as manager:
self.file_list = manager.dict()
print("loaded")
def find_duplicates(self):
if os.path.exists(FOLDER_DUPLICATE) and listdir_nohidden(FOLDER_DUPLICATE):
print("ERROR: Duplicate folder exists and not empty. Halting")
else:
start = timer()
print("Phase 1 - Hashing")
imgs = glob.glob('*.jpg')
def get_photo_hashes_pillow(t):
filename, self.file_list = t
with Image.open(filename) as image:
image.draft('L', (32, 32))
row, col = dhash.dhash_row_col(image)
self.file_list[filename] = dhash.format_hex(row, col)
with ProcessPoolExecutor() as executor:
executor.map(get_photo_hashes_pillow, [(jpg, self.file_list) for jpg in imgs])
print(self.file_list)
end = timer()
print_elapsed(end-start)
And I use the class as follow:
from imgtoolkit import imgtoolkit
if __name__ == '__main__':
kit = imgtoolkit.ImgToolkit()
kit.find_duplicates()
What did I miss? I am quite new in Python.
UPDATE
I found that the function get_photo_hashes_pillow never get called, as I put a print() line in the 1st line of function. But why?

Code is working differantly in vscode and jupyter notebook with multiprocessing

I have the code which runs differantly in vs code and jupyternotebook.
from multiprocessing.pool import ThreadPool
import functools
def my_func():
for i in range(4):
time.sleep(5)
print(datetime.now().time())
I run the function as
def smap(f):
return f()
f_inc = functools.partial(my_func)
pool = ThreadPool(3)
res = pool.map(smap, [f_inc])
Expected output:(VS CODE)
02:08:10.288722
02:13:10.288722
02:18:10.288722
02:23:10.288722
But output which is coming is:
02:08:10.288
02:08:10.298
02:08:10.299
02:08:10.2998

Python Multiprocessing within Jupyter Notebook does not work

I am new to multiprocessing module in Python and work with Jupyter notebooks.
When I try to run the following code I keep getting AttributeError: Can't get attribute 'load' on <module '__main__' (built-in)>
When I run the file there is no output, it just keeps loading.
import pandas as pd
import datetime
import urllib
import requests
from pprint import pprint
import time
from io import StringIO
from multiprocessing import Process, Pool
symbols = ['AAP']
start = time.time()
dflist = []
def load(date):
if date is None:
return
url = "http://regsho.finra.org/FNYXshvol{}.txt".format(date)
try:
df = pd.read_csv(url,delimiter='|')
if any(df['Symbol'].isin(symbols)):
stocks = df[df['Symbol'].isin(symbols)]
print(stocks.to_string(index=False, header=False))
# Save stocks to mysql
else:
print(f'No stock found for {date}' )
except urllib.error.HTTPError:
pass
pool = []
numdays = 365
start_date = datetime.datetime(2019, 1, 15 ) #year - month - day
datelist = [
(start_date - datetime.timedelta(days=x)).strftime('%Y%m%d') for x in range(0, numdays)
]
pool = Pool(processes=16)
pool.map(load, datelist)
pool.close()
pool.join()
print(time.time() - start)
What can I do to run this code directly from the notebook without issues?
one way to do it:
1. get load function out and create for example worker.py
2. import worker and worker.load
3.
from multiprocessing import Pool
import worker
if __name__ == '__main__':
pool = []
numdays = 365
start_date = datetime.datetime(2019, 1, 15 ) #year - month - day
datelist = [
(start_date - datetime.timedelta(days=x)).strftime('%Y%m%d') for x in
range(0, numdays)
]
pool = Pool(processes=16)
pool.map(worker.load, datelist)
pool.close()
pool.join()

convert linux python multiprocessing to windows

I would like to use this Linux Python script in Windows Python.
how to rewrite it ? The part to be rewritten in multiprocessing part.
from __future__ import print_function
from collections import Counter
import glob
import multiprocessing
import os
import re
import sys
import time
def create_data(filepath):
...
return values
filepaths = glob.glob('*/*.txt')
num_tasks = len(filepaths)
p = multiprocessing.Pool()
results = p.imap(create_data, filepaths)
while (True):
completed = results._index
print("\r--- Completed {:,} out of {:,}".format(completed, num_tasks), end='')
sys.stdout.flush()
time.sleep(1)
if (completed == num_tasks): break
p.close()
p.join()
df_full = pd.DataFrame(list(results))
print()
thanks for your help.

Trying to use multiprocessing to fill an array in python

I have a code like this
x = 3;
y = 3;
z = 10;
ar = np.zeros((x,y,z))
from multiprocessing import Process, Pool
para = []
process = []
def local_func(section):
print "section %s" % str(section)
ar[2,2,section] = 255
print "value set %d", ar[2,2,section]
pool = Pool(1)
run_list = range(0,10)
list_of_results = pool.map(local_func, run_list)
print ar
The value in ar was not changed with multithreading, what might be wrong?
thanks
You're using multiple processes here, not multiple threads. Because of that, each instance of local_func gets its own separate copy of ar. You can use a custom Manager to create a shared numpy array, which you can pass to each child process and get the results you expect:
import numpy as np
from functools import partial
from multiprocessing import Process, Pool
import multiprocessing.managers
x = 3;
y = 3;
z = 10;
class MyManager(multiprocessing.managers.BaseManager):
pass
MyManager.register('np_zeros', np.zeros, multiprocessing.managers.ArrayProxy)
para = []
process = []
def local_func(ar, section):
print "section %s" % str(section)
ar[2,2,section] = 255
print "value set %d", ar[2,2,section]
if __name__ == "__main__":
m = MyManager()
m.start()
ar = m.np_zeros((x,y,z))
pool = Pool(1)
run_list = range(0,10)
func = partial(local_func, ar)
list_of_results = pool.map(func, run_list)
print ar
Well, multi-threading and multi-processing are different things.
With multi-threading threads share access to the same array.
With multi-processing each process has its own copy of the array.
multiprocessing.Pool is a process pool, not a thread pool.
If you want thread pool, use multiprocess.pool.ThreadPool:
Replace:
from multiprocessing import Pool
with:
from multiprocessing.pool import ThreadPool as Pool

Categories

Resources