Adding a ProgressBar to a multithreaded Python script - python
i am trying to add a progressbar to my script but i couldn't succeed because i think it is multi-threaded or maybe it should be added in a separate thread . i found plenty of solutions in stackoverflow , for example tqdm library but i couldn't implement it , also i think i have a confusion where exactly i have to implement the progress bar code to make it works.
this is my code :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
start_raw = "myfile"
threads = 10
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print x
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
Main()
i have tried this solution
https://stackoverflow.com/a/45276885/9746396 :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
import tqdm
start_raw = "myfile"
threads = 1
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print (x)
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
with Pool(2) as p:
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
but code does not run and i get exception (TypeError: 'NoneType' object is not callable)
0%| | 0/30 [00:00<?, ?it/s]Traceback (most recent call last):
File "file.py", line 35, in <module>
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
File "C:\mypath\Python\Python38-32\lib\site-packages\tqdm\std.py", line 1118, in __iter__
for obj in iterable:
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 865, in next
raise value
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
TypeError: 'NoneType' object is not callable
0%| | 0/30 [00:00<?, ?it/s]
I presume you wanted to pass myfunction instead of Main to imap, consistently with the first example.
When you pass Main() to p.imap in r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30)), Python calls executes Main method and passes the return value as the first argument to imap.
You should remove the parentheses after Main as: p.imap in r = list(tqdm.tqdm(p.imap(Main, range(30)), total=30)).
Related
python3 subprocess with pickle channeling
from david beazley paper(https://www.dabeaz.com/usenix2009/concurrent/Concurrent.pdf, pg62), i am trying to run codes : # channel.py import pickle class Channel(object): def __init__(self,out_f,in_f): self.out_f = out_f self.in_f = in_f def send(self,item): pickle.dump(item,self.out_f) self.out_f.flush() def recv(self): return pickle.load(self.in_f) # child.py import channel import sys ch = channel.Channel(sys.stdout,sys.stdin) while True: item = ch.recv() ch.send(("child",item)) # parent.py import channel import subprocess p = subprocess.Popen(['python','child.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) ch = channel.Channel(p.stdin,p.stdout) • Using the child worker ch.send("Hello World") Hello World When I try this code, I got this error Traceback (most recent call last): File "child.py", line 6, in <module> item = ch.recv() File "C:\Users\bluesky\backtest\channel.py", line 16, in recv return pickle.load(self.in_f) TypeError: a bytes-like object is required, not 'str' what i am trying to get is that sending my data to child and get back processed data to parent process, is there any way to fix it
Multiprocessing using partial() throws ForkingPickler error
I am trying to crawl abstracts from PubMed and filtering them using regex via python. To speed things up, I wanted to use pythons multiprocessing pool. My code looks like the following: import multiprocessing as mp from functools import partial from typing import List, Tuple def beautify_abstract(abstract: str, regex: str): import re result: str = "" last_start = 0 matches = re.finditer(regex, abstract, re.MULTILINE) for matchNum, match in enumerate(matches, start=1): result += abstract[last_start:match.start()] result += "<b>" result += abstract[match.start():match.end()] result += "</b>" last_start = match.end() result += abstract[last_start:] return result def get_doi(pim: str, regex: str): from Bio import Entrez from Bio.Entrez import efetch import re from metapub.convert import pmid2doi Entrez.email = "Your.Name.Here#example.org" print(f"Processing {pim}") abstract_handle = efetch(db="pubmed", id=pim, retmode='text', rettype='all') abstract = abstract_handle.read() abstract_handle.close() if re.search(regex, abstract, re.MULTILINE) is not None: docsum_handle = efetch(db="pubmed", id=pim, retmode='text', rettype='docsum').read() docsum = docsum_handle.read() try: doi = pmid2doi(pim) except: doi = "UNKNOWN" return f"{doi}" return "" def get_pim_with_regex_list(keywords: List[str]) -> List[str]: from Bio import Entrez Entrez.email = "Your.Name.Here#example.org" searchterm = " ".join(keywords) pims = [] handle = Entrez.esearch(db="pubmed", retstart=0, retmax=0, term=searchterm, idtype="acc") record = Entrez.read(handle) handle.close() count = int(record['Count']) if count > 100000: retmax = 100000 else: retmax = count retstart = 0 while retstart < count: handle = Entrez.esearch(db="pubmed", retstart=retstart, retmax=retmax, term=searchterm, idtype="acc") record = Entrez.read(handle) handle.close() for pim in record['IdList']: pims.append(pim) retstart += retmax return pims if __name__ == '__main__': keywords = ["keyword1", "keyword2"] pim_list = get_pim_with_regex_list(keywords) regex = "keyword1 keyword2" worker_fn = partial(get_doi, regex=regex) pool = mp.Pool(mp.cpu_count()) entries = pool.map(worker_fn, pim_list) pool.close() pool.join() When I run the given code, I get the following error: Traceback (most recent call last): File "/usr/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap self.run() File "/usr/lib/python3.9/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/usr/lib/python3.9/multiprocessing/pool.py", line 114, in worker task = get() File "/usr/lib/python3.9/multiprocessing/queues.py", line 368, in get return _ForkingPickler.loads(res) TypeError: __new__() missing 2 required positional arguments: 'tag' and 'attributes' Process ForkPoolWorker-4: Traceback (most recent call last): File "/usr/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap self.run() File "/usr/lib/python3.9/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/usr/lib/python3.9/multiprocessing/pool.py", line 114, in worker task = get() File "/usr/lib/python3.9/multiprocessing/queues.py", line 368, in get return _ForkingPickler.loads(res) TypeError: __new__() missing 2 required positional arguments: 'tag' and 'attributes' I did some digging into multiprocessing with python and found out that only python native types are supported as parameters (enforced by the ForkingPickler). Assuming that str is a native type, the code should work... Currently, I am completely lost and have no idea what may be the problem. As suggested, I uploaded a minimal (sequential) working example here Is there any way to fix this problem or at least diagnose the real issue here?
Invalid handle when calling Windows API from Python 3
The following code works well in Python 2: import ctypes def test(): OpenSCManager = ctypes.windll.advapi32.OpenSCManagerA CloseServiceHandle = ctypes.windll.advapi32.CloseServiceHandle handle = OpenSCManager(None, None, 0) print(hex(handle)) assert handle, ctypes.GetLastError() assert CloseServiceHandle(handle), ctypes.GetLastError() test() It does not work in Python 3: 0x40d88f90 Traceback (most recent call last): File ".\test1.py", line 12, in <module> test() File ".\test1.py", line 10, in test assert CloseServiceHandle(handle), ctypes.GetLastError() AssertionError: 6 6 means invalid handle. It seems that in addition, the handles retrieved in Python 2 are smaller numbers, such as 0x100ffc0. It isn't something specific with CloseServiceHandle. This handle cannot be used with any service function. Both Python versions are 64 bit native Windows Python.
You should use argtypes and restype otherwise all argument default to int and are truncated in 64-bit. Also you shouldn't call GetLastError directly but use ctypes.get_last_error()which cache the last error code (there might have been windows APIs called by the interpreter after you perform a call, you can't be sure). Here's a working example: #!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys import ctypes def test(): advapi32 = ctypes.WinDLL("advapi32", use_last_error=True) OpenSCManager = advapi32.OpenSCManagerA OpenSCManager.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_ulong] OpenSCManager.restype = ctypes.c_void_p CloseServiceHandle = advapi32.CloseServiceHandle CloseServiceHandle.argtypes = [ctypes.c_void_p] CloseServiceHandle.restype = ctypes.c_long handle = OpenSCManager(None, None, 0) if not handle: raise ctypes.WinError(ctypes.get_last_error()) print(f"handle: {handle:#x}") result = CloseServiceHandle(handle) if result == 0: raise ctypes.WinError(ctypes.get_last_error()) def main(): test() if __name__ == "__main__": sys.exit(main())
Try using ctypes.windll.Advapi32 instead of ctypes.windll.advapi32
How do I read this stringified javascript variable into Python?
I'm trying to read _pageData from https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1 into Python 2.7.11 so that I can process it, using this code: #!/usr/bin/env python # -*- coding: utf-8 -*- """ Testing _pageData processing. """ import urllib2 import re import ast import json import yaml BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1' def main(): """ Do the business. """ response = urllib2.urlopen(BASE_URL, None) results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read()) first_result = results[0] # These all fail data = ast.literal_eval(first_result) # data = yaml.load(first_result) # data = json.loads(first_result) if __name__ == '__main__': main() but get the following error: Traceback (most recent call last): File "./temp.py", line 24, in <module> main() File "./temp.py", line 19, in main data = ast.literal_eval(first_result) File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 49, in literal_eval node_or_string = parse(node_or_string, mode='eval') File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 37, in parse return compile(source, filename, mode, PyCF_ONLY_AST) File "<unknown>", line 1 [[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n ^ SyntaxError: invalid syntax var _pageData is in this format: "[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n" I've tried replacing the \" and \n and decoding the \uXXXX before using, without success. I've also tried replacing ,, with ,"", and ,'', without success. Thank you.
It seems like there are three kinds of syntactic errors in your string: , followed by , [ followed by , , followed by ] Assuming that those are supposed to be null elements (or ''?), you can just replace those in the original string -- exactly like you did for the ,, case, but you missed the others. Also, you have to do the ,, replacement twice, otherwise you will miss cases such as ,,,,. Then, you can load the JSON string with json.loads. >>> s = "your messed up json string" >>> s = re.sub(r",\s*,", ", null,", s) >>> s = re.sub(r",\s*,", ", null,", s) >>> s = re.sub(r"\[\s*,", "[ null,", s) >>> s = re.sub(r",\s*\]", ", null]", s) >>> json.loads(s)
I started off using ast.literal.eval(...) because I was under the (mistaken?) impression that javascript arrays and Python lists were mutually compatible, so all I had to do was destringify _pageData. However, I hadn't noticed that Python doesn't like ,, true, false or [,. Fixing them does the trick (thank you #Two-Bit Alchemist and #tobias_k) So, the following appears to work: #!/usr/bin/env python # -*- coding: utf-8 -*- """ Testing _pageData processing. """ import urllib2 import re import ast import json import yaml BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1' def main(): """ Do the business. """ response = urllib2.urlopen(BASE_URL, None) results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read()) first_result = results[0] first_result = first_result.replace(',,,,,,', ',None,None,None,None,None,') first_result = first_result.replace(',,,,,', ',None,None,None,None,') first_result = first_result.replace(',,,,', ',None,None,None,') first_result = first_result.replace(',,,', ',None,None,') first_result = first_result.replace(',,', ',None,') first_result = first_result.replace('[,', '[None,') first_result = first_result.replace('\\"', '\'') first_result = first_result.replace('\\n', '') first_result = first_result.replace('true', 'True') first_result = first_result.replace('false', 'False') data = ast.literal_eval(first_result) for entry in data: print entry if __name__ == '__main__': main()
TypeError: Popen not iterable
Am re-writing a program from previous question, and I am having trouble with it. Please see code: #!/usr/bin/python import subprocess,time, timeit from multiprocessing import Process, Queue import re, os, pprint, math from collections import defaultdict Dict = {} identifier = "" hexbits = [] count = defaultdict(int) def __ReadRX__(RX_info): lines = iter(RX_info.stdout.readline, "") try: start = time.clock() for line in lines: if re.match(r"^\d+.*$",line): splitline = line.split() del splitline[1:4] identifier = splitline[1] count[identifier] += 1 end = time.clock() timing = round((end - start) * 10000, 100) dlc = splitline[2] hexbits = splitline[3:] Dict[identifier] = [dlc, hexbits, count[identifier],int(timing)] start = end except keyboardinterrupt: pass procRX = subprocess.Popen('receivetest -f=/dev/pcan32'.split(), stdout=subprocess.PIPE) if __name__ == '__main__': munchCan = Process(target=__ReadRX__, args=(procRX)) munchCan.start() munchCan.join() print Dict When trying to run the code I get the following error: File "./cancheck2.py", line 36, in <module> munchCan = Process(target=__ReadRx__, args=(procRX)) File "/usr/lib/python2.7/multiprocessing/process.py", line 104, in __init__ self._args = tuple(args) TypeError: 'Popen' objec is not iterable This code worked before I seperated the subprocess and set up __ReadRX__ as a seperate process. Coudl anyone explain what is happening as I don't quite understand?
(procRX) doesn't create a tuple, you have to use (procRX,).