I would like an exception to be thrown so I can complete coverage for a few lines.
def __query_items_from_db(self, my_id: str) -> list:
result = None
try:
result = self.table.query(
KeyConditionExpression='#id = :id',
ExpressionAttributeValues={
':id': my_id
},
ExpressionAttributeNames={
'#id': 'MY_ID'
}
)
except ClientError as e:
print('__query_items_from_db', e)
return result
This code works and won't throw an error as I have other code that sets up the table and and seeds data.
Here's what I tried to get the error to throw:
#mock_dynamodb2
def test_should_handle_an_error():
db_resource = create_mock_table()
module = CoverageReport(db_resource)
with pytest.raises(ClientError) as e:
raise ClientError() <-- i don't think this is right
actual_result = module._CoverageReport__query_items_from_db(
1) <-- this should return None because the ClientError is fired
assert actual_result == None
Any ideas?
Turns out I was thinking about this the wrong way. I forced an error by not creating the table before the test executes so I can't "query" a non-existent table. Now I can check that my result is None.
def test_should_handle_an_error():
db_resource = boto3.resource('dynamodb')
module = CoverageReport(db_resource)
actual_result = module._CoverageReport__query_items_from_db('testtesttest')
assert actual_result == None
Related
As the title says, I want to check the syntax of a query just before executing it in Python.
For example, a function named check_syntax:
correct_sql = "select * from table;"
wrong_sql = "selecccct * from table;"
check_syntax(correct_sql) # return True
check_syntax(wrong_sql) # return False
I wonder, is there any library in Python that can help? I have looked into sqlparse, but it doesn't help.
pyparsing includes a SELECT statement parser in its examples directory. You could use it like this:
from pyparsing import Optional, ParseException
from select_parser import select_stmt
def check_syntax(s):
try:
(select_stmt + Optional(';')).parseString(s, parseAll=True)
except ParseException as pe:
print(pe.explain())
return False
else:
return True
correct_sql = "select * from table;"
wrong_sql = "selecccct * from table;"
print(check_syntax(correct_sql)) # return True
print(check_syntax(wrong_sql)) # return False
This might be un-ideal but you could do:
def check_syntax(request: str) -> bool:
try:
cursor.execute(request)
return True
except:
return False
The problem is that you would not "only check" the syntax. What you could is doing the same but parsing the error message e (except Exception as e) and only returning False if the error code is 102: IncorrectSyntax.
Here is something that could help to parse the exception messages of pyodbc, but depending on what library you use, you'd have to change the class.
Today I have spent lots of time learning abit slowly with Postgres and I have been creating a code that does some stuff with the database such as insert, select etc etc.
I have realized that most of my code is copy paste when it comes to Connect & Disconnet and I know some people do not like it also depends on what I am doing so before people gets mad at me, do not take this as a bad code but more that I would like to imrpove of course <3
What I have done so far is:
import psycopg2
import psycopg2.extras
from loguru import logger
DATABASE_CONNECTION = {
"host": "TEST",
"database": "TEST",
"user": "TEST",
"password": "TEST"
}
def register_datas(store, data):
"""
Register a data to database
:param store:
:param data:
:return:
"""
ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
ps_cursor = ps_connection.cursor()
ps_connection.autocommit = True
sql_update_query = "INSERT INTO public.store_items (store, name) VALUES (%s, %s);"
try:
data_tuple = (store, data["name"])
ps_cursor.execute(sql_update_query, data_tuple)
has_registered = ps_cursor.rowcount
ps_cursor.close()
ps_connection.close()
return bool(has_registered)
except (Exception, psycopg2.DatabaseError) as error:
logger.exception("Error: %s" % error)
ps_connection.rollback()
ps_cursor.close()
ps_connection.close()
return False
def get_all_keywords(keywords):
"""
Get all keywords
:param positive_or_negative:
:return:
"""
ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
ps_cursor = ps_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
sql_update_query = "SELECT keyword FROM public.keywords WHERE filter_type = %s;"
try:
data_tuple = (keywords,)
ps_cursor.execute(sql_update_query, data_tuple)
all_keywords = [keyword["keyword"] for keyword in ps_cursor]
ps_cursor.close()
ps_connection.close()
return all_keywords
except (Exception, psycopg2.DatabaseError) as error:
logger.exception("Error: %s" % error)
ps_connection.rollback()
ps_cursor.close()
ps_connection.close()
return []
def check_if_store_exists(store):
"""
Check if the store exists in database
:param store:
:return:
"""
ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
ps_cursor = ps_connection.cursor()
sql_update_query = "SELECT store FROM public.store_config WHERE store = %s;"
try:
data_tuple = (store,)
ps_cursor.execute(sql_update_query, data_tuple)
exists = bool(ps_cursor.fetchone())
ps_cursor.close()
ps_connection.close()
return exists
except (Exception, psycopg2.DatabaseError) as error:
logger.exception("Error: %s" % error)
ps_connection.rollback()
ps_cursor.close()
ps_connection.close()
return []
and I do see that I have same code where I do:
ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
ps_cursor = ps_connection.cursor()
...
...
...
...
ps_cursor.close()
ps_connection.close()
return data
and to shorter this code, I wonder if its possible to do a function where I call the function that connects -> lets me do the query/execution -> close the connection and then return the data I want to return?
Context manager
There is a pattern built-in Python. It is named context manager. It has two purposes:
Do operations before and after some logic in with block.
Catch errors inside with block and allow to handle it in a custom way.
To create a context manager, you can go in either of two ways. One (I like it more) is to define a class satisfying the following protocol:
__enter__(self)
__exit__(self, exc_type, exc_value, traceback)
Any class that satisfies the protocol can be used in with statement and works as a context manager. Accordingly to Python duck-typing principles, the interpreter "knows" how to use the class in with statement.
Example:
class QuickConnection:
def __init__(self):
self.ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
self.ps_cursor = ps_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
def __enter__(self):
return self.ps_cursor
def __exit__(self, err_type, err_value, traceback):
if err_type and err_value:
self.ps_connection.rollback()
self.ps_cursor.close()
self.ps_connection.close()
return False
Return value of __exit__ method does matter. If True returned then all the errors happened in the with block suppressed. If False returned then the errors raised at the end of the __exit__ execution. The return values of __exit__ is better to keep explicit since the feature itself is not that obvious.
Or use contextlib.contextmanager decorator
from contextlib import contextmanager
#contextmanager
def quick_connection():
ps_connection = psycopg2.connect(**DATABASE_CONNECTION)
ps_cursor = ps_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
try:
yield ps_cursor
except Exception: # don't do this, catch specific errors instead.
ps_connection.rollback()
raise
finally:
ps_cursor.close()
ps_connection.close()
Usage
with QuickConnection() as ps_cursor:
data_tuple = (store, data["name"])
ps_cursor.execute(sql_update_query, data_tuple)
has_registered = ps_cursor.rowcount
With the context manager, you can reuse a connection, be sure that it is closed. Also, you can catch and handle errors related to your DB operations in the context manager. Usage of context managers is compact and readable, I believe in the end this is the goal.
I'm trying to call the extract function and the extract_url function within a function. I get name error: name 'endpoint' and name 'agg_key' is not defined. I'm doing this so I can call a script from another script so I don't need to run the command line. How would I go about doing this?
Function I'm trying to call:
def scrape_all_products(URL):
extract(endpoint, agg_key, page_range=None)
extract_url(args)
Functions I'm calling:
def extract(endpoint, agg_key, page_range=None):
r_list = list(range(page_range[0], page_range[1]+1)) if page_range else []
page = 1
agg_data = []
while True:
page_endpoint = endpoint + f'?page={str(page)}'
response = requests.get(page_endpoint, timeout=(
int(os.environ.get('REQUEST_TIMEOUT', 0)) or 10))
response.raise_for_status()
if response.url != page_endpoint: # to handle potential redirects
p_endpoint = urlparse(response.url) # parsed URL
endpoint = p_endpoint.scheme + '://' + p_endpoint.netloc + p_endpoint.path
if not response.headers['Content-Type'] == 'application/json; charset=utf-8':
raise Exception('Incorrect response content type')
data = response.json()
page_has_products = agg_key in data and len(
data[agg_key]) > 0
page_in_range = page in r_list or page_range is None
# break loop if empty or want first page
if not page_has_products or not page_in_range:
break
agg_data += data[agg_key]
page += 1
return agg_data
Other function:
def extract_url(args):
p = format_url(args.url, scheme='https', return_type='parse_result')
formatted_url = p.geturl()
agg_key = 'products'
if args.collections:
agg_key = 'collections'
fp = os.path.join(
args.dest_path, f'{p.netloc}.{agg_key}.{args.output_type}')
if args.file_path:
fp = os.path.join(
args.dest_path, f'{args.file_path}.{args.output_type}')
endpoint = f'{formatted_url}/{agg_key}.json'
ret = {
'endpoint_attempted': endpoint,
'collected_at': str(datetime.now()),
'success': False,
'error': ''
}
try:
data = extract(endpoint, agg_key, args.page_range)
except requests.exceptions.HTTPError as err:
ret['error'] = str(err)
except json.decoder.JSONDecodeError as err:
ret['error'] = str(err)
except Exception as err:
ret['error'] = str(err)
else:
ret['success'] = True
ret[agg_key] = data
if ret['success']:
ret['file_path'] = str(fp)
save_to_file(fp, data, args.output_type)
return ret
The scrape_all_products function only knows about variables created inside of that function and variables passed to it (which in this case is URL). endpoint and agg_key were both created inside of a different function. You have to pass those variables to scrape_all_products the same way you are passing URL. So do:
def scrape_all_products(URL, endpoint, agg_key, args):
And then you would have to appropriately modify anywhere scrape_all_products is called.
I have a set of data from json that I open and compare with another set of data from csv.
assert data1_json1 == data1_csv1
assert data2_json1 == data2_csv2
assert data3_json1 == data3_csv3
assert data4_json1 == data4_csv4
.......
assert data1_json2 == data1_csv10
assert data2_json2 == data2_csv11
assert data3_json2 == data3_csv12
assert data4_json2 == data4_csv13
.......
It works well till the time I need to get detailed response. I can use try except for one set of data, but I don't know how to use it for all asserts without code duplication.
try:
assert data1_json == data1_csv
except AssertionError:
raise AssertionError('Json: ' + data1_json, 'CSV: ' + data1_csv)
You can send the message along with assert.
For example,
try:
assert data1_json == data1_csv, "Json: "+data1_json+" CSV: "+data1_csv
assert data2_json == data2_csv, "Json: "+data2_json+" CSV: "+data2_csv
...
except AssertionError as e:
print(e)
That e will display "Json: "+data1_json+" CSV: "+data1_csv
In further easy mode, pass the values in a tuple and retrieve it in AssertionError mode
try:
assert data1_json == data1_csv, (data1_json,data1_csv)
assert data2_json == data2_csv, (data2_json,data2_csv)
...
except AssertionError as e:
output = list(e[0])
print("Json: "+output[0]+" CSV: "+output[1])
i have a bot(query, key) function to post data, dicts(query, answer) to wrap the return result, and query_pipe(query_list) to process list of query request. But when i put that in multiprocessing.Process, i found that bot(query, key) return nothing. Here's my code.
def bot(query, key):
data = {
'key' : key,
'info' : query,
'userid' : 'wechat-robot',
}
try:
apiUrl = url
page = requests.post(apiUrl, data=data)
if page.json()['code'] == '100000':
answer = page.json()['text']
return dicts(query, answer)
else:
return dicts(query, 'failed')
except Exception as e:
return '500 Error'
def dicts(query, answer):
return {'query': query, 'answer': answer}
def query_pipe(query_list):
keys_pool = []
with open('keys.txt', 'r') as f:
lines = f.readlines()
for line in lines:
keys_pool.append(line.strip('\n'))
idx = 0
print(bot(query_list[0], keys_pool[0]))
p = Process(target=query_pipe, args=(query_data,))
p.start()
p.join()
But when i run the query_pipe(query_list) which not using multiprocess.Process, query_pipe(query_list) would print the correct output. I feel so confused, so anyone could give me a hint would be highly appreciated.