How would you clean this dictionary in Python? - python

This is my first attempt building something non-web and involving logic coding.
Please take a look at this god-awful dictionary below:
Messy_Dict=
{
'name': "['\\r\\n NASDAQ: BKEP\\r\\n ']",
'underlying': "['1.12']",
'strike_prices_list': ["['2.50'", " '5.00'", " '7.50']"],
'call_bid': ["['\\r\\n0.05 '", " '\\r\\n0.00 '", " '\\r\\n0.00 ']"],
'put_ask': ["['\\r\\n2.10 '", " '\\r\\n4.50 '", " '\\r\\n7.00 ']"]
}
What I want to do is clean up the unnecessary sub-strings within each dictionary value to get something like this:
Clean_Dict=
{
'name': "BKEP",
'underlying': "1.12",
'strike_prices_list': ["2.50", "5.00", "7.50"],
'call_bid': ["0.05", "0.00", "0.00"],
'put_ask': ["2.10", "4.50", "7.00"]
}
I have managed to get from Messy_Dict to Clean_Dict but I used very barbaric means to do so. I will just say that it included a for loop and multiple strip(), replace('', '') methods. And it pains me to look at that block of code in my .py file.
So I guess, is there a more elegant method in performing the desired task of converting Messy_Dict to Clean_Dict? I feel as if I'm missing something here in my fundamentals.
Edit
def parse(self, response):
strike_prices_main = response.css('.highlight , .aright .strike-col').css('::text').extract()
if not strike_prices_main:
pass
else:
name = response.css('#instrumentticker::text').extract()
strike_prices_list = response.css('.aright .strike-col').css('::text').extract()
call_bid = response.css('.aright td:nth-child(5)').css('::text').extract()
put_ask = response.css('.aright td:nth-child(14)').css('::text').extract()
underlying = response.css('.pricewrap .bgLast').css('::text').extract()
file.write('%s|%s|%s|%s|%s\n'%(name,underlying,strike_prices_list,call_bid,put_ask))
Using spiders to crawl!

Maybe like this:
import re
Messy_Dict= \
{
'name': "['\\r\\n NASDAQ: BKEP\\r\\n ']",
'underlying': "['1.12']",
'strike_prices_list': ["['2.50'", " '5.00'", " '7.50']"],
'call_bid': ["['\\r\\n0.05 '", " '\\r\\n0.00 '", " '\\r\\n0.00 ']"],
'put_ask': ["['\\r\\n2.10 '", " '\\r\\n4.50 '", " '\\r\\n7.00 ']"]
}
regexstr = "\\\\(r|n)|\s|\[|\]|\'|NASDAQ:"
dict_clean = {}
for k, v in Messy_Dict.items():
if isinstance(v, list):
list_clean = []
for el in v:
el_clean = re.sub(regexstr, "", el)
list_clean.append(el_clean)
dict_clean[k] = list_clean
else:
dict_clean[k] = re.sub(regexstr, "", v)
dict_clean

You can use regular expressions.
Example:
import re
messy_dict = {
'name': "['\\r\\n NASDAQ: BKEP\\r\\n ']",
'underlying': "['1.12']",
'strike_prices_list': ["['2.50'", " '5.00'", " '7.50']"],
'call_bid': ["['\\r\\n0.05 '", " '\\r\\n0.00 '", " '\\r\\n0.00 ']"],
'put_ask': ["['\\r\\n2.10 '", " '\\r\\n4.50 '", " '\\r\\n7.00 ']"]
}
for key in messy_dict:
stripfunc = lambda x: re.sub('[^\d\.]', '', str(x))
if type(messy_dict[key]) is list:
messy_dict[key] = [stripfunc(x) for x in messy_dict[key]]
else:
messy_dict[key] = stripfunc(messy_dict[key])
print(messy_dict)
Explanation: [^ ] matches anything that is NOT in the set. \d is for numeric values and the backslash escapes the dot. Using str(val) to make strings out of the lists.
Output: {'name': '', 'underlying': '1.12', 'strike_prices_list': ['2.50', '5.00', '7.50'], 'call_bid': ['0.05', '0.00', '0.00'], 'put_ask': ['2.10', '4.50', '7.00']}
Edit: just noticed that you also want to keep the dot. Updated the code.

Related

Send loop result within email body

I'm trying to present "print" result (example 1) in content of email (example 2).
Data I use:
{
"#odata.context": "XXX",
"value": [
{
"Customer_Name": "ABC inc",
"Customer_No": "002",
"Date_of_Export": "2020-01-10T05:36:55.3Z",
"Error_Message": "error message 1.",
"Type_of_Export": "E-Mail"
},
{
"Customer_Name": "CBA inc",
"Customer_No": "001",
"Date_of_Export": "2020-01-10T05:39:13.137Z",
"Error_Message": "Error message 2",
"Type_of_Export": "E-Mail"
}
]}
EXAMPLE 1 (PRINT):
When I use below code I get desired result printed:
r = requests.get("http://www.example.com/test.json")
r_dict = json.loads(r.text)
if len(r_dict["value"])==0:
print("No errors")
else:
print("List of errors:\n")
for item in r_dict["value"]:
print('"Customer":', '(' + item['Customer_No'] + ')' + item['Customer_Name'] + ', "Typ":', item['Type_of_Export'] + ', "Error": ', item['Error_Message'])
I get:
List of errors:
"Customer": (002)ABC inc, "Typ": E-Mail, "Error": error message 1.
"Customer": (001)CBA inc, "Typ": E-Mail, "Error": Error message 2
EXAMPLE 2 (MAIL):
But i can't get that right to send same result within email body, because only last line is enclosed
List of errors:
"Customer": (001)CBA inc, "Typ": E-Mail, "Error": Error message 2
Code I use:
import json
import requests
import smtplib
from requests_ntlm import HttpNtlmAuth
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
html_top = """<html><body>"""
html_end = """</body></html> """
r = requests.get("http://www.example.com/test.json")
r_dict = json.loads(r.text)
if len(r_dict["value"])==0:
print("No errors")
else:
html_body_top = "<p>List of errors:<br></p>"
for item in r_dict["value"]:
html_body = '"Customer":' + '(' + item['Customer_No'] + ')' + item['Customer_Name'] + ', "Typ":' + item['Type_of_Export'] + ', "Error": ' + item['Error_Message']
mail_html = MIMEText(html_top + html_body_top + html_body + html_end, "html")
message.attach(mail_html)
with smtplib.SMTP("000.000.000.000", xx) as server:
server.sendmail(sender_email, receiver_email, message.as_string())
I would like to send email containing all 2 lines, just like in example 1.
Your problem is that each time this loop iterate you are overwriting the previous value of html_body
for item in r_dict["value"]:
html_body = '"Customer":' + '(' + item['Customer_No'] + ')' + item['Customer_Name'] + ', "Typ":' + item['Type_of_Export'] + ', "Error": ' + item['Error_Message']
A quick solution is to change to using the += operator and add a new line character on the end.
html_body = ''
for item in r_dict["value"]:
html_body += '"Customer":' + '(' + item['Customer_No'] + ')' + item['Customer_Name'] + ', "Typ":' + item['Type_of_Export'] + ', "Error": ' + item['Error_Message'] + '\n'
Before your for loop, declare the html_body and add the values on it with +=:
html_body = ""
for item in r_dict["value"]:
html_body += '"Customer":' + '(' + item['Customer_No'] + ')' + item['Customer_Name'] + ', "Typ":' + item['Type_of_Export'] + ', "Error": ' + item['Error_Message'] + '\n'
I added the \n at the end to "print" a breakline.

How to get rid of the quote marks from a dict key/value pair?

I have a script that parses a yaml file and extracts key/value pairs and prints them, but I keep getting single quotes in the output.
How do I get rid of the quote marks?
YML Snippet
AsNum:
description: Local AS for BGP global
format: string
type: string
Function from Script
def getVals(dict):
for key,value in dict.items():
#print(keys)
if isDict(value):
if key != "properties" and key != "items":
print(key)
getVals(value)
else:
print("key: ", key, " value: ", value)
Example Output
AsNum
('key: ', 'type', ' value: ', 'string')
('key: ', 'description', ' value: ', 'Local AS for BGP global')
('key: ', 'format', ' value: ', 'string')

append key's value on same key

This is what I currently have
code
coll = con['X']['Y']
s = "meta http equiv"
m = {'i': s}
n = json.dumps(m)
o = json.loads(n)
coll.insert(o)
data
{
"_id" : ObjectId("58527fe656c7a95cfaf40a15"),
"i" : "meta http equiv"
}
Now in the next iteration, s will change(as per my computations) and I want to append the value of s to same key
let's say in next iteration s becomes sample test data and on same key i
So I want this
{
"_id" : ObjectId("58527fe656c7a95cfaf40a15"),
"i" : "meta http equiv sample test data and"
}
How to achieve this?
Change the way you have formed s:
s = "meta http equiv"
s = (coll.get('i', '') + ' ' + s) if coll.get('i', '') else s
If coll isn't a dict object use getattr instead:
s = "meta http equiv"
s = (getattr(coll, 'i', '') + ' ' + s) if getattr(coll, 'i', '') else s

How do I URL encode this

Need this:
POST&https%3A%2F%2Fsecure.trademe.co.nz%2FOauth%2FRequestToken&oauth_callback%3Dhttp%253A%252F%252Fwww.website-tm-access.co.nz%252Ftrademe-callback%26oauth_consumer_key%3DC74CD73FDBE37D29BDD21BAB54BC70E422%26oauth_nonce%3D7O3kEe%26oauth_signature_method%3DHMAC-SHA1%26oauth_timestamp%3D1285532322%26oauth_version%3D1.0%26scope%3DMyTradeMeRead%252CMyTradeMeWrite
Myattempt:
New_base_string ="POST&https%3A%2F%2Fsecure.trademe.co.nz%2FOauth%2FRequestToken&oauth_callback%3Dhttp%253A%252F%252Fwww.website-tm-access.co.nz%252Ftrademe-callback%26oauth_consumer_key%" + str(consumer_key) +"3DC74CD73FDBE37D29BDD21BAB54BC70E422%26oauth_nonce%3" + str(nonce) + "%26oauth_signature_method%3DHMAC-SHA1%26oauth_timestamp%3" + str(time) + "%26oauth_version%3D1.0%26scope%3DMyTradeMeRead%252CMyTradeMeWrite"
I just tried to append it to the end, will this work or will i need to append to a list and then encode?
so like this:
headers = { my_variable + other_variable }
authorization = '5C82CC6BC7C6472154FBC9CAB24A29A2 ' + ', '.join([key + '="' + urllib.parse.quote_plus(str(value)) + '"' for key, value in headers.items()])
General
If you want to URL encode parameters to your POST request the best way is:
import urllib
f = { 'eventName' : 'myEvent',
'eventDescription' : 'cool event',
'url' : 'http://www.google.com'}
print 'POST&%s' % urllib.urlencode(f)
Output:
POST&eventName=myEvent&url=http%3A%2F%2Fwww.google.com&eventDescription=cool+event
with Dictionary its not ordered if you want to order it just use a list
import urllib
f = [ ('eventName', 'myEvent'),
('eventDescription', 'cool event'),
('url', 'http://www.google.com')]
print 'POST&%s' % urllib.urlencode(f)
Output
POST&eventName=myEvent&eventDescription=cool+event&url=http%3A%2F%2Fwww.google.com
How to get your need this string (Python 3.5)
While the general example is tested in python 2.7, I wrote your example with python 3.5 code.
import urllib.parse
method = "POST"
url = "https://secure.trademe.co.nz/Oauth/RequestToken"
params = [('oauth_callback', 'http://www.website-tm-access.co.nz/trademe-callback'),
('oauth_consumer_key', 'C74CD73FDBE37D29BDD21BAB54BC70E422'),
('oauth_nonce', '7O3kEe'),
('oauth_signature_method', 'HMAC-SHA1'),
('oauth_timestamp', 1285532322),
('oauth_version', 1.0),
('scope', "MyTradeMeRead,MyTradeMeWrite")]
print('POST&%(url)s&%(params)s' % { 'url' : urllib.parse.quote_plus(url), 'params' : urllib.parse.quote_plus(urllib.parse.urlencode(params)) })
Output
POST&https%3A%2F%2Fsecure.trademe.co.nz%2FOauth%2FRequestToken&oauth_callback%3Dhttp%253A%252F%252Fwww.website-tm-access.co.nz%252Ftrademe-callback%26oauth_consumer_key%3DC74CD73FDBE37D29BDD21BAB54BC70E422%26oauth_nonce%3D7O3kEe%26oauth_signature_method%3DHMAC-SHA1%26oauth_timestamp%3D1285532322%26oauth_version%3D1.0%26scope%3DMyTradeMeRead%252CMyTradeMeWrite

How can I extract specific elements out of a unstructured list and put them into a dataframe using Python

I have a long list of strings and I want to extract only rows that have "Town":"Some City" & "State":"Some State" and then put those values into a dataframe with town and state as column headers. I've copied an extract of the strings below (it excludes the beginning [ and ending ] because the list is really long. Any ideas?
' "IsPayAtLocation": null,',
' "IsMembershipRequired": null,',
' "IsAccessKeyRequired": null,',
' "ID": 1,',
' "Title": "Public"',
' },',
' "UsageCost": "Free",',
' "AddressInfo": {',
' "ID": 57105,',
' "Title": "Somerset North",',
' "AddressLine1": "2800 W. Big Beaver Rd",',
' "AddressLine2": null,',
' "Town": "Troy",',
' "StateOrProvince": "MI",',
' "Postcode": "48084",',
' "CountryID": 2,',
' "Country": {',
' "ISOCode": "US",'
^[^,]*\b(?:Town|State).*$
You can use this re.findall.See demo.
https://regex101.com/r/hE4jH0/34
import re
p = re.compile(r'^[^,]*\b(?:Town|State).*$', re.MULTILINE)
test_str = "\"UsageCost\"', ' \"Free\",']\n[' \"AddressInfo\"', ' {']\n[' \"ID\"', ' 57105,']\n[' \"Title\"', ' \"Somerset North\",']\n[' \"AddressLine1\"', ' \"2800 W. Big Beaver Rd\",']\n[' \"AddressLine2\"', ' null,']\n[' \"Town\"', ' \"Troy\",']\n[' \"StateOrProvince\"', ' \"MI\",']\n[' \"Postcode\"', ' \"48084\",']\n[' \"CountryID\"', ' 2,']\n[' \"Country\"', ' {']\n[' \"ISOCode\"', ' \"US\",']\n[' \"ContinentCode\"', ' \"NA\",']\n[' \"ID\"', ' 2,']\n[' \"Title\"', ' \"United States\"']"
re.findall(p, test_str)
It's easier to extract the towns and states separately.
You didn't specify into what kind of table you want to put the town and state values, so I give an example with a database table.
strings = [
' ...',
' "AddressLine2": null,',
' "Town": "Troy",',
' "StateOrProvince": "MI",',
' ...',
' "Town": "Troy",',
' "StateOrProvince": "MO",',
]
cities = [s.split('"')[3] for s in strings if '"Town":' in s]
states = [s.split('"')[3] for s in strings if '"StateO' in s]
import sqlite3
data = sqlite3.connect(':memory:')
data.execute('CREATE TABLE towns (town TEXT, state CHAR(2))')
data.executemany('INSERT INTO towns VALUES (?, ?)', zip(cities, states))
print('town\tstate')
for row in data.execute('SELECT * FROM towns'): print(row[0]+'\t'+row[1])

Categories

Resources