I've written a script to pipe through data from the Kustomer API to our database, and although it works fine its a bit messy, was wondering if there's a more elegant solution to this. I'm defining the row of results im pushing through as a dictionary then pushing to MySQL, but the messy part comes when some of these values aren't available in the JSON all the time.
This has resulted in a try / except statements for each data point that may or may not be missing.
Is there a better way of doing this? Code below.
try:
record_data = {
'id': record['id'],
'created_at': str(datetime.strptime(record['attributes']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7],
'last_activity_at': str(datetime.strptime(record['attributes']['lastActivityAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7],
'first_marked_done': None,
'last_marked_done': None,
'assigned_team': record['attributes']['assignedTeams'][0] if record['attributes']['assignedTeams'] != [] else None,
'conversation_type': None,
'conversation_category': None,
'conversation_subcategory': None,
'message_count': record['attributes']['messageCount'],
'note_count': record['attributes']['noteCount'],
'satisfaction': record['attributes']['satisfaction'],
'status': None,
'email': 1 if len(list(filter(lambda x: x == 'email', record['attributes']['channels']))) > 0 else 0,
'chat': 1 if len(list(filter(lambda x: x == 'chat', record['attributes']['channels']))) > 0 else 0,
'priority': record['attributes']['priority'],
'direction': 'outbound' if record['attributes']['direction'] == 'out' else 'in',
'nlp_score': None,
'nlp_sentiment': None,
'waiting_for': None,
'sla_breach': None,
'sla_status': None,
'breached_sla': None,
'breached_at': None
}
try:
record_data['status'] = record['attributes']['status']
except KeyError:
pass
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
record_data['conversation_category'] = str(record['attributes']['custom']['categoryTree']).split('.')[0]
record_data['conversation_subcategory'] = str(record['attributes']['custom']['categoryTree']).split('.')[1] if len(str(record['attributes']['custom']['categoryTree']).split('.')) > 1 else None
except KeyError:
pass
try:
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['first_marked_done'] = str(datetime.strptime(record['attributes']['firstDone']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
record_data['last_marked_done'] = str(datetime.strptime(record['attributes']['lastDone']['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
except KeyError:
pass
try:
record_data['sla_breach'] = 0 if record['attributes']['sla']['breached'] is False else 1
record_data['sla_status'] = record['attributes']['sla']['status']
if record_data['sla_breach'] == 1:
try:
record_data['breached_sla'] = record['attributes']['sla']['breach']['metric']
record_data['breached_at'] = record['attributes']['sla']['breach']['at']
except KeyError:
for m in record['attributes']['sla']['metrics']:
try:
if record['attributes']['sla']['metrics'][m]['breachAt'] == record['attributes']['sla']['summary']['firstBreachAt']:
record_data['breached_sla'] = m
record_data['breached_at'] = str(datetime.strptime(record['attributes']['sla']['summary']['firstBreachAt'], '%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
except KeyError:
pass
except KeyError:
record_data['sla_breach'] = 0
print(record_data)
self.db.insert_update(KustomerConversations(**record_data))
except KeyError:
pass
First you should try, where possible, to use dict.get with a default value specified. Next you can consider contextmanager to make your code significantly cleaner. Consider this:
try:
record_data['status'] = record['attributes']['status']
except KeyError:
pass
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
except KeyError:
pass
try:
record_data['first_marked_done'] = record['attributes']['firstDone']['createdAt']
except KeyError:
pass
Now rewritten, you can ensure consistent error handling without repeating logic:
from contextlib import contextmanager
#contextmanager
def error_handling():
try:
yield
except KeyError:
pass
with error_handling():
record_data['status'] = record['attributes']['status']
with error_handling():
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
with error_handling():
record_data['waiting_for'] = record['attributes']['custom']['typeStr']
with error_handling():
record_data['first_marked_done'] = record['attributes']['firstDone']['createdAt']
You can define an arbitrary number of functions like error_handling for various rules you wish to apply.
You can use function, that give you element from nested dicts, and doesn't raise an exception, if it doesnt' exists.
Like this quick draft:
def get_nested_dict_value(src_dict, *nested_keys, **kwargs):
"""
Get value of some nested dict by series of keys with default value.
Example:
instead of:
x = data['a']['b']['c']['d']
use
x = get_nested_dict_value(data, 'a', 'b', 'c', 'd')
or, if you need some non-None default value, add default=xxx kwarg:
x = get_nested_dict_value(data, 'a', 'b', 'c', 'd', default=0)
"""
default = kwargs.get("default", None)
pointer = src_dict
i = 0
for key in nested_keys:
i += 1
if key in pointer:
pointer = pointer[key]
if i == len(nested_keys):
return pointer
else:
return default
So, instead of:
try:
record_data['conversation_type'] = record['attributes']['custom']['typeStr']
except Exception:
pass
You just type:
record_data['conversation_type'] = get_nested_dict_value(record, 'attributes', 'custom', 'typeStr')
The different naming conventions on the input and output sides make it hard to beat the clarity of explicit assignments. Preserving the exact semantics of your version (e.g., that it doesn't assign conversation_category in the absence of a typeStr even if categoryTree is available) excludes certain choices (like making a data structure to loop over with a try/except on each access); you might be able to do better with more assumptions about your input data.
Nonetheless, in addition to the dict.get already mentioned, you can use builtins (any, or, and dict) and introduce a helper function and a few temporary variables to make the code much more readable:
# this gives one digit of the hour for me...?
def ptime(s): return str(datetime.strptime(s,'%Y-%m-%dT%H:%M:%S.%fZ'))[:-7]
try:
attr=record['attributes']
cust=attr.get('custom',{}) # defer KeyErrors into the below
record_data = dict(
id = record['id'],
created_at = ptime(attr['createdAt']),
last_activity_at = ptime(attr['lastActivityAt']),
first_marked_done = None,
last_marked_done = None,
assigned_team = attr['assignedTeams'][0] or None,
conversation_type = None,
conversation_category = None,
conversation_subcategory = None,
message_count = attr['messageCount'],
note_count = attr['noteCount'],
satisfaction = attr['satisfaction'],
status = attr.get('status'),
email = int(any(x == 'email' for x in attr['channels'])),
chat = int(any(x == 'chat' for x in attr['channels'])),
priority = attr['priority'],
direction = 'outbound' if attr['direction'] == 'out' else 'in',
nlp_score = None,
nlp_sentiment = None,
waiting_for = cust.get('typeStr'),
sla_breach = 0,
sla_status = None,
breached_sla = None,
breached_at = None
)
try:
record_data['conversation_type'] = cust['typeStr']
cat=str(cust['categoryTree']).split('.')
record_data['conversation_category'] = cat[0]
record_data['conversation_subcategory'] = cat[1] if len(cat) > 1 else None
except KeyError: pass
try:
record_data['first_marked_done'] = ptime(attr['firstDone']['createdAt'])
record_data['last_marked_done'] = ptime(attr['lastDone']['createdAt'])
except KeyError: pass
try:
sla=attr['sla']
record_data['sla_breach'] = 0 if sla['breached'] is False else 1
record_data['sla_status'] = sla['status']
if record_data['sla_breach'] == 1:
try:
record_data['breached_sla'] = sla['breach']['metric']
record_data['breached_at'] = sla['breach']['at']
except KeyError:
for m,v in sla['metrics'].items():
try:
v=v['breachAt']
if v == sla['summary']['firstBreachAt']:
record_data['breached_sla'] = m
record_data['breached_at'] = ptime(v)
except KeyError: pass
except KeyError: pass
print(record_data)
self.db.insert_update(KustomerConversations(**record_data))
except KeyError: pass
While you might have a policy against it, in this case I recommend writing the remaining except KeyError: pass clauses on one line each: it helps the visual bracketing of the tentative code.
Related
I'm trying to scrape a site (discogs.com) for a few different fields (num_have, num_want, num_versions, num_for_sale, value) per release_id. Generally it works ok, but I want to set some conditions to exclude release ids where:
num_have is greater than 18,
num_versions is 2 or less,
num_for_sale is 5 or less,
So I want results to be any release id that meets all three conditions. I can do that for conditions 1 & 2, but the 3rd is giving me trouble. I don't know how to adjust for where num_for_sale is 0. According to the api documentation (https://www.discogs.com/developers/#page:marketplace,header:marketplace-release-statistics), the body should look like this:
{
"lowest_price": {
"currency": "USD",
"value": 2.09
},
"num_for_sale": 26,
"blocked_from_sale": false
}
and "Releases that have no items for sale in the marketplace will return a body with null data in the lowest_price and num_for_sale keys. Releases that are blocked for sale will also have null data for these keys." So I think my errors are coming from where num_for_sale is 0, the script doesn't know what when value. When I wrap the code that accesses market_data in a try-except block, and set the values for value and currency to None if an exception occurs, I get an AttributeError "NoneType' object has no attribute 'get'"
What am I doing wrong? How should I rewrite this code:
import pandas as pd
import requests
import time
import tqdm
unique_northAmerica = pd.read_pickle("/Users/EJ/northAmerica_df.pkl")
unique_northAmerica = unique_northAmerica.iloc[1:69]
headers = {'Authorization': 'Discogs key=MY-KEY'}
results = []
for index, row in tqdm.tqdm(unique_northAmerica.iterrows(), total=len(unique_northAmerica)):
release_id = row['release_id']
response = requests.get(f'https://api.discogs.com/releases/{release_id}', headers=headers)
data = response.json()
if 'community' in data:
num_have = data['community']['have']
num_want = data['community']['want']
else:
num_have = None
num_want = None
if "master_id" in data:
master_id = data['master_id']
response = requests.get(f"https://api.discogs.com/masters/{master_id}/versions", headers=headers)
versions_data = response.json()
if "versions" in versions_data:
num_versions = len(versions_data["versions"])
else:
num_versions = 1
else:
num_versions = 1
response = requests.get(f'https://api.discogs.com/marketplace/stats/{release_id}', headers=headers)
market_data = response.json()
num_for_sale = market_data.get('num_for_sale', None)
# Add the condition to only append to `results` if num_have <= 18 and num_versions <= 2
if num_have and num_versions and num_have <= 18 and num_versions <= 2:
if num_for_sale and num_for_sale <= 5:
if 'lowest_price' in market_data:
value = market_data['lowest_price'].get('value', None)
else:
value = None
else:
value = None
if num_for_sale == 0:
value = None
results.append({
'release_id': release_id,
'num_have': num_have,
'num_want': num_want,
'num_versions': num_versions,
'num_for_sale': num_for_sale,
'value': value
})
time.sleep(4)
df = pd.DataFrame(results)
df.to_pickle("/Users/EJ/example.pkl")
Thanks in advance!
I've tried wrapping the code that accesses market_data in a try-except block, and set the values for value and currency to None if an exception occurs, I get an AttributeError "NoneType' object has no attribute 'get'"
Edit:
Traceback (most recent call last)
Cell In [139], line 41
39 if num_for_sale <= 5:
40 if 'lowest_price' in market_data:
---> 41 value = market_data['lowest_price'].get('value', None)
42 else:
43 value = None
AttributeError: 'NoneType' object has no attribute 'get'
You just need to add a check to see if the data is None.
if 'lowest_price' in market_data and market_data['lowest_price'] is not None:
value = market_data['lowest_price'].get('value', None)
else:
value = None
In fact you can probably skip checking to see if lowest_price exists, because the api instructions tell you it will be there, it just might have null data.
So you could change it to.
if market_data['lowest_price']:
value = ...
else:
value = None
Per the discogs api docs:
Releases that have no items for sale in the marketplace will return a body with null data in the lowest_price and num_for_sale keys. Releases that are blocked for sale will also have null data for these keys.
Which means that in one of those situations the converted json would look like this:
{
"lowest_price": None,
"num_for_sale": None,
"blocked_from_sale": false
}
So when your code tries to call get on market_data['lowest_price'] what your actually doing is calling None.get which raises the error.
The reason why it is still including if num_for_sale > 5 is because you are appending the results regardless of whether your check returns true of false. To fix all you need to do is adjust the indentation on your results.append statement.
if num_have and num_versions and num_have <= 18 and num_versions <= 2:
if num_for_sale and num_for_sale <= 5:
if market_data['lowest_price']:
value = market_data['lowest_price'].get('value', None)
else:
value = None
results.append({
'release_id': release_id,
'num_have': num_have,
'num_want': num_want,
'num_versions': num_versions,
'num_for_sale': num_for_sale,
'value': value
})
I want to check if the fields 'id' and 'folder_id' of api_response equal certain values (here, respectively 25 and 17). If it does, return true, else return false. My problem is that my script returns false when it should return true because the fields id and folder_id are correct.
Here is my code:
SendInBlueAPI = config.get('Config','SendInBlueAPI')
SendInBlueFolderID = config.get('Config','SendInBlueFolderID')
SendInBlueListID = config.get('Config','SendInBlueListID')
## Checking if list exists in SendInBlue
configuration = sib_api_v3_sdk.Configuration()
configuration.api_key['api-key'] = '%s' % (SendInBlueAPI)
api_instance = sib_api_v3_sdk.ListsApi(sib_api_v3_sdk.ApiClient(configuration))
list_id = SendInBlueListID
try:
api_response = api_instance.get_list(list_id)
if api_response.id == SendInBlueListID and api_response.folder_id == SendInBlueFolderID:
print(True)
else:
print(False)
except ApiException as e:
print("Exception when calling ListsApi->get_list: %s\n" % e)
This is the output of api_response:
{'campaign_stats': None,
'created_at': '2023-01-16T10:07:47.000+01:00',
'dynamic_list': False,
'folder_id': 17,
'id': 25,
'name': 'My list',
'total_blacklisted': 2,
'total_subscribers': 244,
'unique_subscribers': 246}
And here is my config file :
[Config]
SendInBlueAPI = MY_API_KEY
SendInBlueFolderID = 17
SendInBlueListID = 25
I have this problem with my code. When Iinsert three or more params in the body request, I get this Error "POST Error: or_ expected 2 arguments, got 3."
I can only pass one or two parameters in the body, in this case it works fine. But I don't understand where is the mistake. Can someone help me?
def read_uptime(logid, filteredData, dateStart, dateEnd, timeStart, timeEnd, elementsForPage, currentPage, filterUptime):
log.info(f"{logid} read_uptime: Started")
try:
# Check Timeframe Correct
startDateTime, endDateTime = _checkDataInput(timeStart, timeEnd, dateStart, dateEnd)
# Create Filter
filters = _createFilter(filteredData, startDateTime, endDateTime, filterUptime)
# Query
dataFiltered = uptime_model_db.query.with_entities(
uptime_model_db.projectId.label('projectId'),
uptime_model_db.url.label('url'),
uptime_model_db.timeStamp.label('timeStamp'),
uptime_model_db.uptime.label('uptime'),
uptime_model_db.latency.label('latency')
).filter(*filters).paginate(per_page=int(elementsForPage + 1), page=int(currentPage), error_out=True)
# Checking more pages
nextPage = {
"currentPage": currentPage,
"totalElements": len(dataFiltered.items)
}
if (len(dataFiltered.items) > elementsForPage):
nextPage["nextPage"] = True
else:
nextPage["nextPage"] = False
# Format and return JSON
return _createJson(dataFiltered.items, nextPage)
except Exception as e:
log.error(f"{logid} read_uptime: function read_uptime returned {e}")
raise e
i get in this code the mistake: "array.Filter.append(and_(uptime_model.db.porjectId == projectId, or_(*arrayUrl))"
def filterAppend(arrayFilter, urls, projectId, arrayUrl):
if(len(urls) == 1):
arrayFilter.append(and_(uptime_model_db.projectId == projectId, uptime_model_db.url == urls[0]))
if(len(urls) > 1):
for url in urls:
arrayUrl.append(uptime_model_db.url == url)
arrayFilter.append(and_(uptime_model_db.projectId == projectId, or_(*arrayUrl)))
i get in this code the mistake:
"filters.append(or_(*arrayFilter))"
def _createFilter(filteredData, startDateTime, endDateTime, filterUptime):
filters = []
if filteredData is not None:
arrayFilter = []
for data in filteredData:
projectId = data["projectId"]
urls = data["uptimeUrls"]
arrayUrl = []
if (len(filteredData) == 1):
filterAppend(filters, urls, projectId, arrayUrl)
else:
filterAppend(arrayFilter, urls, projectId, arrayUrl)
if(len(filteredData) > 1 or len(arrayFilter) > 1):
filters.append(or_(*arrayFilter))
if startDateTime is not None:
filters.append(str(startDateTime) <= uptime_model_db.timeStamp)
if startDateTime is not None:
filters.append(str(endDateTime) >= uptime_model_db.timeStamp)
if filterUptime == "True":
filters.append(uptime_model_db.uptime < 100)
return filters
import or_ from sqlalchemy instead of operators:
from sqlalchemy import or_
I want to create comments from a dataset that details the growth rate, market share, etc for various markets and products. The dataset is in the form of a pd.DataFrame(). I would like the comment to include keywords like increase/decrease based on the calculations, for example, if 2020 Jan has sale of 1000, and 2021 Jan has a sale of 1600, then it will necessary mean an increase of 60%.
I defined a function outside as such and I would like to seek if this method is too clumsy, if so, how should I improve on it.
GrowthIncDec = namedtuple('gr_tuple', ['annual_growth_rate', 'quarterly_growth_rate'])
def increase_decrease(annual_gr, quarter_gr):
if annual_gr > 0:
annual_growth_rate = 'increased'
elif annual_gr < 0:
annual_growth_rate = 'decreased'
else:
annual_growth_rate = 'stayed the same'
if quarter_gr > 0:
quarterly_growth_rate = 'increased'
elif quarter_gr < 0:
quarterly_growth_rate = 'decreased'
else:
quarterly_growth_rate = 'stayed the same'
gr_named_tuple = GrowthIncDec(annual_growth_rate=annual_growth_rate, quarterly_growth_rate=quarterly_growth_rate)
return gr_named_tuple
myfunc = increase_decrease(5, -1)
myfunc.annual_growth_rate
output: 'increased'
A snippet of my main code is as follows to illustrate the use of the above function:
def get_comments(grp, some_dict: Dict[str, List[str]]):
.......
try:
subdf = the dataframe
annual_gr = subdf['Annual_Growth'].values[0]
quarter_gr = subdf['Quarterly_Growth'].values[0]
inc_dec_named_tup = increase_decrease(annual_gr, quarter_gr)
inc_dec_annual_gr = inc_dec_named_tup.annual_growth_rate
inc_dec_quarterly_gr = inc_dec_named_tup.quarterly_growth_rate
comment = "The {} has {} by {:.1%} in {} {} compared to {} {}"\
.format(market, inc_dec_annual_gr, annual_gr, timeperiod, curr_date, timeperiod, prev_year)
comments_df = pd.DataFrame(columns=['Date','Comments'])
# comments_df['Date'] = [curr_date]
comments_df['Comments'] = [comment]
return comments_df
except (IndexError, KeyError) as e:
# this is for all those nan values which is empty
annual_gr = 0
quarter_gr = 0
I'm hoping to use ruamel.yaml to perform some automatic edits against a large human-edited YAML file.
The input file contains merge keys, like so:
foo: &foo
color: red
bar:
name: qux
<<: *foo
If possible, I'd like to preserve the relative ordering of the explicit name key and the << merge key, but it looks like ruamel really wants the merge key to come first. Here's what I get when I round-trip this YAML through ruamel:
foo: &foo
color: red
bar:
<<: *foo
name: qux
Is there any way to tell ruamel to preserve the position of the merge key within this block?
With the tweaking of two lines in the representer for mappings this
can be fixed, the positional information for the merge key was there,
it was just not used. Unfortunately that is a rather large function that
requires a few imports:
import sys
import ruamel.yaml
if ruamel.yaml.version_info < (0, 15, 86):
from ruamel.yaml.nodes import MappingNode, ScalarNode
from ruamel.yaml.comments import comment_attrib, merge_attrib
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
try:
flow_style = mapping.fa.flow_style(flow_style)
except AttributeError:
flow_style = flow_style
try:
anchor = mapping.yaml_anchor()
except AttributeError:
anchor = None
node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
# no sorting! !!
try:
comment = getattr(mapping, comment_attrib)
node.comment = comment.comment
if node.comment and node.comment[1]:
for ct in node.comment[1]:
ct.reset()
item_comments = comment.items
for v in item_comments.values():
if v and v[1]:
for ct in v[1]:
ct.reset()
try:
node.comment.append(comment.end)
except AttributeError:
pass
except AttributeError:
item_comments = {}
merge_list = [m[1] for m in getattr(mapping, merge_attrib, [])]
merge_pos = getattr(mapping, merge_attrib, [[0]])[0][0] # <<<<<<<< line added
item_count = 0
if bool(merge_list):
items = mapping.non_merged_items()
else:
items = mapping.items()
for item_key, item_value in items:
item_count += 1
node_key = self.represent_key(item_key)
node_value = self.represent_data(item_value)
item_comment = item_comments.get(item_key)
if item_comment:
assert getattr(node_key, 'comment', None) is None
node_key.comment = item_comment[:2]
nvc = getattr(node_value, 'comment', None)
if nvc is not None: # end comment already there
nvc[0] = item_comment[2]
nvc[1] = item_comment[3]
else:
node_value.comment = item_comment[2:]
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if ((item_count != 0) or bool(merge_list)) and self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
if bool(merge_list):
# because of the call to represent_data here, the anchors
# are marked as being used and thereby created
if len(merge_list) == 1:
arg = self.represent_data(merge_list[0])
else:
arg = self.represent_data(merge_list)
arg.flow_style = True
value.insert(merge_pos, (ScalarNode(u'tag:yaml.org,2002:merge', '<<'), arg)) # <<<<< line changed
return node
ruamel.yaml.representer.RoundTripRepresenter.represent_mapping = represent_mapping
yaml_str = """\
foo: &foo
color: red
bar:
name: qux
<<: *foo
"""
yaml = ruamel.yaml.YAML()
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
which gives:
foo: &foo
color: red
bar:
name: qux
<<: *foo
The above tries to keep the absolute position, without taking deletion
or inserts of key-value pairs into account.
The above will not patch anything when using the next release of
ruamel.yaml, which will include these changes.