i have a bot(query, key) function to post data, dicts(query, answer) to wrap the return result, and query_pipe(query_list) to process list of query request. But when i put that in multiprocessing.Process, i found that bot(query, key) return nothing. Here's my code.
def bot(query, key):
data = {
'key' : key,
'info' : query,
'userid' : 'wechat-robot',
}
try:
apiUrl = url
page = requests.post(apiUrl, data=data)
if page.json()['code'] == '100000':
answer = page.json()['text']
return dicts(query, answer)
else:
return dicts(query, 'failed')
except Exception as e:
return '500 Error'
def dicts(query, answer):
return {'query': query, 'answer': answer}
def query_pipe(query_list):
keys_pool = []
with open('keys.txt', 'r') as f:
lines = f.readlines()
for line in lines:
keys_pool.append(line.strip('\n'))
idx = 0
print(bot(query_list[0], keys_pool[0]))
p = Process(target=query_pipe, args=(query_data,))
p.start()
p.join()
But when i run the query_pipe(query_list) which not using multiprocess.Process, query_pipe(query_list) would print the correct output. I feel so confused, so anyone could give me a hint would be highly appreciated.
Related
I've a function for connecting to network devices(Netmiko).
I'm catching AuthenticationException from netmiko, changing user creds and trying again.
Main function:
for round, device in zip(range(1, num_devices), devices):
dev_class = d2c(device)
print(f"Device {dev_class.Hostname}: {dev_class.MgmtIp}, {round} of {len(devices)}")
devParsed = connectionMethods.Prep(dev_class, playVars)
services = connectionMethods.TestSSH(devParsed, user)
print(services)
def TestSSH(device, user, notes_in = None, num_tries=0, timeout = 10):
try:
dict_out = {}
notes = {'notes' : []}
if notes_in:
notes['notes'].append(notes_in)
notes['notes'].append(f"Number of tries = {num_tries}")
print(f"""************************************************\n"""
f"""Connecting to device : {device.device.Hostname}\n"""
f"""ip address : {device.device.MgmtIp}\n"""
f"""Tried to connect before {num_tries} times.\n"""
f"""Using {user.username} to connect\n"""
f"""************************************************\n""")
logging.basicConfig(filename='debug.log', level=logging.DEBUG)
logger = logging.getLogger(f"netmiko")
Conn = {'device_type':device.device.driver,
'ip': device.device.MgmtIp,
'username': user.username,
'password': user.password,
'conn_timeout' : timeout,
}
pattern = re.compile(r'(server\s+)(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})')
net_connect = ConnectHandler(**Conn)
if not net_connect.check_enable_mode():
net_connect.enable()
for command in device.questions:
output_list = []
list_out = []
for k, v in command.items():
output = net_connect.send_command(v)
print(f"""Output from device :
{output}""")
for line in output.splitlines():
result = pattern.search(line)
if result:
output_list.append(result.group(2))
num_servers = len(output_list)
print(f"Number of lines returned {num_servers}")
if num_servers > 0:
for round, line in zip(range(1,num_servers + 1), output_list):
list_out.append(f"server{round} : {line}")
list_parsed = ','.join(list_out)
print(list_parsed)
else:
list_parsed = "No servers configured"
dict_out.update({k : list_parsed})
except NetmikoAuthenticationException as e:
exception_type = type(e).__name__
print(f"Error type = {exception_type}")
if num_tries == 0:
if input("Retry with a new token?:").lower() == "y":
retry_user = User()
retry_user.GetToken()
TestSSH(device, retry_user, notes_in= "admin login",num_tries=1, timeout=10)
else:
notes['notes'].append(f"{exception_type}, retries : {num_tries}")
notes['notes'] = ','.join(notes.get('notes'))
dict_out.update(notes)
print(f""" Error from device {device.device.Hostname}:
{dict_out.get('notes')}""")
return dict_out, 400
else:
notes['notes'].append(f"{exception_type}, retries : {num_tries}")
notes['notes'] = ','.join(notes.get('notes'))
dict_out.update(notes)
print(dict_out)
print(f""" Error from device {device.device.Hostname}:
{dict_out.get('notes')}""")
print(type(dict_out))
return dict_out, 401
What has me scratching my head is this:
If num_tries == 0 and I choose not to try another account, return is as I would expect:
Retry with a new token?:n
Error from device xxx.xxx.xxx:
NetmikoAuthenticationException, retries : 0
({'notes': 'NetmikoAuthenticationException, retries : 0'}, 400)
If num_tries is gt == 0:
Error type = NetmikoAuthenticationException
{'notes': 'admin login,Number of tries = 1,NetmikoAuthenticationException, retries : 1'}
Error from device xxx.xxx.xxx:
admin login,Number of tries = 1,NetmikoAuthenticationException, retries : 1
<class 'dict'>
None
I can't figure out the returned dictionary is None when it clearly is not when I print it before returning it.
Any suggestion how to troubleshoot the issue are welcome.
Python version Python 3.9.6
Br,
Heikki
I would hope to produce:
({'notes': 'NetmikoAuthenticationException, retries : 1'}, 401)
The reason you get None printed here: print(services) is because you also call TestSSH() from within itself and from there the function exits the function by default. That is, its like python has written a return None for you at the end of the function.
You could add a return yourself, but I don't know if this will help:
return TestSSH(device, retry_user, notes_in= "admin login",num_tries=1, timeout=10)
I'm currently trying to loop over roughly 9000 .txt files in python to extract data and add them to a joined pandas data frame. The .txt data is stored in bytes, so in order to access it I was told to use a decoder. Because I'm interested in preserving special characters, I would like to use the UTF-8 decoder, but I'm getting the following error when trying to do so:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 3131: invalid start byte
For some reason, the code works just fine when using a 'ISO-8859-1' decoder, but this obviously messes up all special characters. Does anyone know how to fix this? I'm pasting my code below! Also, the decoding works for the first ~1600 .txt files in my dataset, but for the rest it doesn't.
decode_counter = 0
for index, i in enumerate(corpus[0]):
decode_counter += 1
corpus.iloc[index][0] = i.decode('UTF-8')
The corpus variable contains the name of the .txt file as an index, and the contents of the individual .txt files in a column named 0. Thank you very much!
Maybe you could try every codec available in your environment and check which result fits best.
Here is a way of doing that:
import os, codecs, encodings
from collections import OrderedDict
from typing import Union
from cprinter import TC
from input_timeout import InputTimeout
class CodecChecker:
def __init__(self):
self.encodingdict = self.get_codecs()
self.results = OrderedDict()
def get_codecs(self):
dir = encodings.__path__[0]
codec_names = OrderedDict()
for filename in os.listdir(dir):
if not filename.endswith(".py"):
continue
name = filename[:-3]
try:
codec_names[name] = OrderedDict({"object": codecs.lookup(name)})
except Exception as Fehler:
pass
return codec_names
def try_open_file(self, path: str, readlines: int = 0):
self.results = OrderedDict()
results = OrderedDict()
if readlines == 0:
for key, item in self.encodingdict.items():
results[key] = {"strict_encoded": [], "strict_bad": True}
try:
with open(path, encoding=key) as f:
data = f.read()
results[key]["strict_encoded"].append(data)
results[key]["strict_bad"] = False
except Exception as fe:
results[key]["strict_encoded"].append(str(fe))
continue
else:
for key, item in self.encodingdict.items():
results[key] = {"strict_encoded": [], "strict_bad": True}
try:
with open(path, encoding=key) as f:
for ini, line in enumerate(f.readlines()):
if ini == readlines:
break
results[key]["strict_encoded"].append(line[:-1])
results[key]["strict_bad"] = False
except Exception as fe:
results[key]["strict_encoded"].append(str(fe))
continue
self.results = results.copy()
return self
def try_convert_bytes(self, variable: bytes):
self.results = OrderedDict()
results = OrderedDict()
modes = ["strict", "ignore", "replace"]
for key, item in self.encodingdict.items():
results[key] = {
"strict_encoded": [],
"strict_bad": True,
"ignore_encoded": [],
"ignore_bad": True,
"replace_encoded": [],
"replace_bad": True,
}
for mo in modes:
try:
results[key][f"{mo}_encoded"].append(
item["object"].decode(variable, mo)
)
results[key][f"{mo}_bad"] = False
except Exception as Fe:
results[key][f"{mo}_encoded"].append(str(Fe))
self.results = results.copy()
return self
def print_results(
self, pause_after_interval: Union[int, float] = 0, items_per_interval: int = 0
):
counter = 0
for key, item in self.results.items():
if pause_after_interval != 0 and items_per_interval != 0:
if items_per_interval == counter and counter > 0:
i = InputTimeout(
timeout=pause_after_interval,
input_message=f"Press any key to continue or wait {pause_after_interval} seconds",
timeout_message="",
defaultvalue="",
cancelbutton=None,
show_special_characters_warning=None,
).finalvalue
counter = 0
print(
f'\n\n\n{"Codec".ljust(20)}: {str(TC(key).bg_cyan.fg_black)}'.ljust(100)
)
if "strict_bad" in item and "strict_encoded" in item:
print(f'{"Mode".ljust(20)}: {TC("strict").fg_yellow.bg_black}')
if item["strict_bad"] is False:
if isinstance(item["strict_encoded"][0], tuple):
if item["strict_bad"] is False:
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{item['strict_encoded'][0][1]}''').fg_purple.bg_black}\n{'Converted'.ljust(20)}: {TC(f'''{item['strict_encoded'][0][0]}''').fg_green.bg_black}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {item['strict_encoded'][0][0]!r}"""
)
if item["strict_bad"] is True:
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{"None"}''').fg_red.bg_black}\n{'Converted'.ljust(20)}: {TC(f'''{item['strict_encoded'][0]}''').fg_red.bg_black}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {item['strict_encoded'][0][0]!r}"""
)
if isinstance(item["strict_encoded"][0], str):
if item["strict_bad"] is False:
itemlen = len("".join(item["strict_encoded"]))
concatitem = "\n" + "\n".join(
[
f"""Line: {str(y).ljust(14)} {str(f'''{x}''')}"""
for y, x in enumerate(item["strict_encoded"])
]
)
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{itemlen}''').fg_purple.bg_black}\n{'Converted'.ljust(20)}: {concatitem}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {concatitem!r}"""
)
if item["strict_bad"] is True:
concatitem = TC(
" ".join(item["strict_encoded"])
).fg_red.bg_black
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{"None"}''').fg_red.bg_black}\n{'Converted'.ljust(20)}: {concatitem}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {concatitem!r}"""
)
print("")
if "ignore_bad" in item and "ignore_encoded" in item:
print(f'{"Mode".ljust(20)}: {TC("ignore").fg_yellow.bg_black}')
if item["ignore_bad"] is False:
if isinstance(item["ignore_encoded"][0], tuple):
if item["ignore_bad"] is False:
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{item['ignore_encoded'][0][1]}''').bg_black.fg_lightgrey}\n{'Converted'.ljust(20)}: {TC(f'''{item['ignore_encoded'][0][0]}''').bg_black.fg_lightgrey}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {item['ignore_encoded'][0][0]!r}"""
)
print("")
if "replace_bad" in item and "replace_encoded" in item:
print(f'{"Mode".ljust(20)}: {TC("replace").fg_yellow.bg_black}')
if item["replace_bad"] is False:
if isinstance(item["replace_encoded"][0], tuple):
if item["replace_bad"] is False:
try:
print(
f"""{'Length'.ljust(20)}: {TC(f'''{item['replace_encoded'][0][1]}''').bg_black.fg_lightgrey}\n{'Converted'.ljust(20)}: {TC(f'''{item['replace_encoded'][0][0]}''').bg_black.fg_lightgrey}"""
)
except Exception:
print(
f"""Problems during printing! Raw string: {item['replace_encoded'][0][0]!r}"""
)
counter = counter + 1
return self
if __name__ == "__main__":
teststuff = b"""This is a test!
Hi there!
A little test! """
testfilename = "test_utf8.tmp"
with open("test_utf8.tmp", mode="w", encoding="utf-8-sig") as f:
f.write(teststuff.decode("utf-8-sig"))
codechecker = CodecChecker()
codechecker.try_open_file(testfilename, readlines=2).print_results(
pause_after_interval=1, items_per_interval=10
)
codechecker.try_open_file(testfilename).print_results()
codechecker.try_convert_bytes(teststuff.decode("cp850").encode()).print_results(
pause_after_interval=1, items_per_interval=10
)
Or you simply run a script to replace all messed up characters. Since I am a German teacher, I have this problem frequently (encoding problems due to Umlaut). Here is a script to replace all characters (too big to post the script here): https://github.com/hansalemaos/LatinFixer/blob/main/__init__.py
I'm trying to call the extract function and the extract_url function within a function. I get name error: name 'endpoint' and name 'agg_key' is not defined. I'm doing this so I can call a script from another script so I don't need to run the command line. How would I go about doing this?
Function I'm trying to call:
def scrape_all_products(URL):
extract(endpoint, agg_key, page_range=None)
extract_url(args)
Functions I'm calling:
def extract(endpoint, agg_key, page_range=None):
r_list = list(range(page_range[0], page_range[1]+1)) if page_range else []
page = 1
agg_data = []
while True:
page_endpoint = endpoint + f'?page={str(page)}'
response = requests.get(page_endpoint, timeout=(
int(os.environ.get('REQUEST_TIMEOUT', 0)) or 10))
response.raise_for_status()
if response.url != page_endpoint: # to handle potential redirects
p_endpoint = urlparse(response.url) # parsed URL
endpoint = p_endpoint.scheme + '://' + p_endpoint.netloc + p_endpoint.path
if not response.headers['Content-Type'] == 'application/json; charset=utf-8':
raise Exception('Incorrect response content type')
data = response.json()
page_has_products = agg_key in data and len(
data[agg_key]) > 0
page_in_range = page in r_list or page_range is None
# break loop if empty or want first page
if not page_has_products or not page_in_range:
break
agg_data += data[agg_key]
page += 1
return agg_data
Other function:
def extract_url(args):
p = format_url(args.url, scheme='https', return_type='parse_result')
formatted_url = p.geturl()
agg_key = 'products'
if args.collections:
agg_key = 'collections'
fp = os.path.join(
args.dest_path, f'{p.netloc}.{agg_key}.{args.output_type}')
if args.file_path:
fp = os.path.join(
args.dest_path, f'{args.file_path}.{args.output_type}')
endpoint = f'{formatted_url}/{agg_key}.json'
ret = {
'endpoint_attempted': endpoint,
'collected_at': str(datetime.now()),
'success': False,
'error': ''
}
try:
data = extract(endpoint, agg_key, args.page_range)
except requests.exceptions.HTTPError as err:
ret['error'] = str(err)
except json.decoder.JSONDecodeError as err:
ret['error'] = str(err)
except Exception as err:
ret['error'] = str(err)
else:
ret['success'] = True
ret[agg_key] = data
if ret['success']:
ret['file_path'] = str(fp)
save_to_file(fp, data, args.output_type)
return ret
The scrape_all_products function only knows about variables created inside of that function and variables passed to it (which in this case is URL). endpoint and agg_key were both created inside of a different function. You have to pass those variables to scrape_all_products the same way you are passing URL. So do:
def scrape_all_products(URL, endpoint, agg_key, args):
And then you would have to appropriately modify anywhere scrape_all_products is called.
I would like an exception to be thrown so I can complete coverage for a few lines.
def __query_items_from_db(self, my_id: str) -> list:
result = None
try:
result = self.table.query(
KeyConditionExpression='#id = :id',
ExpressionAttributeValues={
':id': my_id
},
ExpressionAttributeNames={
'#id': 'MY_ID'
}
)
except ClientError as e:
print('__query_items_from_db', e)
return result
This code works and won't throw an error as I have other code that sets up the table and and seeds data.
Here's what I tried to get the error to throw:
#mock_dynamodb2
def test_should_handle_an_error():
db_resource = create_mock_table()
module = CoverageReport(db_resource)
with pytest.raises(ClientError) as e:
raise ClientError() <-- i don't think this is right
actual_result = module._CoverageReport__query_items_from_db(
1) <-- this should return None because the ClientError is fired
assert actual_result == None
Any ideas?
Turns out I was thinking about this the wrong way. I forced an error by not creating the table before the test executes so I can't "query" a non-existent table. Now I can check that my result is None.
def test_should_handle_an_error():
db_resource = boto3.resource('dynamodb')
module = CoverageReport(db_resource)
actual_result = module._CoverageReport__query_items_from_db('testtesttest')
assert actual_result == None
I am getting an unexpected error when using this. The first section is from a script that I found online, and I am trying to use it to pull a particular section identified in the PDF's outline. Everything works fine, except right at output.write(outputfile1) it says:
PdfReadError: multiple definitions in dictionary.
Anybody else run into this? Please forgive all the unnecessary prints at the end. :)
import pyPdf
import glob
class Darrell(pyPdf.PdfFileReader):
def getDestinationPageNumbers(self):
def _setup_outline_page_ids(outline, _result=None):
if _result is None:
_result = {}
for obj in outline:
if isinstance(obj, pyPdf.pdf.Destination):
_result[(id(obj), obj.title)] = obj.page.idnum
elif isinstance(obj, list):
_setup_outline_page_ids(obj, _result)
return _result
def _setup_page_id_to_num(pages=None, _result=None, _num_pages=None):
if _result is None:
_result = {}
if pages is None:
_num_pages = []
pages = self.trailer["/Root"].getObject()["/Pages"].getObject()
t = pages["/Type"]
if t == "/Pages":
for page in pages["/Kids"]:
_result[page.idnum] = len(_num_pages)
_setup_page_id_to_num(page.getObject(), _result, _num_pages)
elif t == "/Page":
_num_pages.append(1)
return _result
outline_page_ids = _setup_outline_page_ids(self.getOutlines())
page_id_to_page_numbers = _setup_page_id_to_num()
result = {}
for (_, title), page_idnum in outline_page_ids.iteritems():
result[title] = page_id_to_page_numbers.get(page_idnum, '???')
return result
for fileName in glob.glob("*.pdf"):
output = pyPdf.PdfFileWriter()
print fileName
pdf = Darrell(open(fileName, 'rb'))
template = '%-5s %s'
print template % ('page', 'title')
for p,t in sorted([(v,k) for k,v in pdf.getDestinationPageNumbers().iteritems()]):
print template % (p+1,t)
for p,t in sorted([(v,k) for k,v in pdf.getDestinationPageNumbers().iteritems()]):
if t == "CATEGORY 1":
startpg = p+1
print p+1,'is the first page of Category 1.'
if t == "CATEGORY 2":
endpg = p+1
print p+1,'is the last page of Category 1.'
print startpg, endpg
pagenums = range(startpg,endpg)
print pagenums
for i in pagenums:
output.addPage(pdf.getPage(i))
fileName2 = "%sCategory1_data.pdf" % (str(fileName[:-13]))
print "%s has %s pages." % (fileName2,output.getNumPages())
outputfile1 = file(r"%s" % (fileName2), 'wb')
output.write(outputfile1)
outputfile1.close()
I know it might be too late for you, but for anyone else who will stumble here to look for the answer:
I had the same problem today, setting:
export_reader = PdfFileReader(filename, strict=False)
If you are just merging, then use:
merger = PdfFileMerger(strict=False)
This way, you will get only a warning, rather than an exception.