Parse server payload with few keys absent - python

I have a rather basic bit of code. Basically what it does is sends an API request to a locally hosted Server and returns a JSON string. I'm taking that string and cracking it apart. Then I take what I need from it, make a Dictionary, and export it as an XML file with an nfo extension.
The issue is sometimes there are missing bits to the source data. Season is missing fairly frequently for example. It breaks the Data Mapping. I need a way to handle that. For somethings I may want to exclude the data and for others I need a sane default value.
#!/bin/env python
import os
import requests
import re
import json
import dicttoxml
import xml.dom.minidom
from xml.dom.minidom import parseString
# Grab Shoko Auth Key
apiheaders = {
'Content-Type': 'application/json',
'Accept': 'application/json',
}
apidata = '{"user": "Default", "pass": "", "device": "CLI"}'
r = requests.post('http://192.168.254.100:8111/api/auth',
headers=apiheaders, data=apidata)
key = json.loads(r.text)['apikey']
# Grabbing Episode Data
EpisodeHeaders = {
'accept': 'text/plain',
'apikey': key
}
EpisodeParams = (
('filename',
"FILE HERE"),
('pic', '1'),
)
fileinfo = requests.get(
'http://192.168.254.100:8111/api/ep/getbyfilename', headers=EpisodeHeaders, params=EpisodeParams)
# Mapping Data from Shoko to Jellyfin NFO
string = json.loads(fileinfo.text)
print(string)
eplot = json.loads(fileinfo.text)['summary']
etitle = json.loads(fileinfo.text)['name']
eyear = json.loads(fileinfo.text)['year']
episode = json.loads(fileinfo.text)['epnumber']
season = json.loads(fileinfo.text)['season']
aid = json.loads(fileinfo.text)['aid']
seasonnum = season.split('x')
# Create Dictionary From Mapped Data
show = {
"plot": eplot,
"title": etitle,
"year": eyear,
"episode": episode,
"season": seasonnum[0],
}
Here is some example output when the code crashes
{'type': 'ep', 'eptype': 'Credits', 'epnumber': 1, 'aid': 10713, 'eid': 167848,
'id': 95272, 'name': 'Opening', 'summary': 'Episode Overview not Available',
'year': '2014', 'air': '2014-11-23', 'rating': '10.00', 'votes': '1',
'art': {'fanart': [{'url': '/api/v2/image/support/plex_404.png'}],
'thumb': [{'url': '/api/v2/image/support/plex_404.png'}]}}
Traceback (most recent call last):
File "/home/fletcher/Documents/Shoko-Jellyfin-NFO/Xml3.py", line 48, in <module>
season = json.loads(fileinfo.text)['season']
KeyError: 'season'
The solution based on what Mahori suggested. Worked perfectly.
eplot = json.loads(fileinfo.text).get('summary', None)
etitle = json.loads(fileinfo.text).get('name', None)
eyear = json.loads(fileinfo.text).get('year', None)
episode = json.loads(fileinfo.text).get('epnumber', None)
season = json.loads(fileinfo.text).get('season', '1x1')
aid = json.loads(fileinfo.text).get('aid', None)

This is fairly common scenario with web development, where you cannot always assume other party will send all keys.
The standard way to get around this is by using get instead of named fetch.
season = json.loads(fileinfo.text).get('season', None)
#you can change None to any default value here

Related

Flashbots "X-Flashbots-Signature" header not working correctly with web3.py

Recently I've been having some trouble with the X-Flashbots-Signature header when sending a request to the flashbots goerli endpoint.
My python code looks like this:
import requests
import json
import secrets
from eth_account import Account, messages
from web3 import Web3
from math import ceil
rpcUrl = GOERLI_RPC_NODE_PROVIDER
web3 = Web3(Web3.HTTPProvider(rpcUrl))
publicKey = ETH_PUBLIC_KEY
privateKey = ETH_PRIVATE_KEY
contractAddress = GOERLI_TEST_CONTRACT # Goerli test contract
data = CONTRACT_DATA # Contract data to execute
signed = []
for _ in range(2):
nonce = web3.eth.getTransactionCount(publicKey, 'pending')
checksumAddress = Web3.toChecksumAddress(contractAddress)
checksumPublic = Web3.toChecksumAddress(publicKey)
tx = {
'nonce': nonce,
'to': checksumAddress,
'from': checksumPublic,
'value': 0,
'gasPrice': web3.toWei(200, 'gwei'),
'data': data
}
gas = web3.eth.estimateGas(tx)
tx['gas'] = ceil(gas + gas * .1)
signed_tx = web3.eth.account.signTransaction(tx, privateKey)
signed.append(Web3.toHex(signed_tx.rawTransaction))
dt = {
'jsonrpc': '2.0',
'method': 'eth_sendBundle',
'params': [
{
'txs': [
signed[0], signed[1] # Signed txs with web3.eth.account.signTransaction
],
'blockNumber': web3.eth.block_number + 1,
'minTimestamp': '0x0',
'maxTimestamp': '0x0',
'revertingTxHashes': []
}
],
'id': 1337
}
pvk = secrets.token_hex(32)
pbk = Account.from_key(pvk).address
body = json.dumps(dt)
message = messages.encode_defunct(text=Web3.keccak(text=body).hex())
signature = pbk + ':' + Account.sign_message(message, pvk).signature.hex()
hd = {
'Content-Type': 'application/json',
'X-Flashbots-Signature': signature,
}
res = requests.post('https://relay-goerli.flashbots.net/', headers=hd, data=body)
print(res.text)
This code is a modified version of code taken straight from the flashbots docs: https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint/#authentication
Upon running this code I get an internal server error error response. At first, I thought the problem might be fixed by replacing text=Web3.keccak(text=body).hex() to hexstr=Web3.keccak(text=body).hex() or primative=Web3.keccak(text=body), as per the definition of messages.encode_defunct: https://eth-account.readthedocs.io/en/stable/eth_account.html#eth_account.messages.encode_defunct. But after making this replacement, I got the error signer address does not equal expected. This is very confusing, especially because I have resolved the message
with the signature myself and the public key does match. But whenever I send it to the flashbots endpoint, I am left with this error.
Any ideas would be greatly appreciated.

EBAY Finding API Date Filtering

I am trying to return a list of completed items in a given category using the ebay API. My code seems to be working however the results seem to be very limited (about 100). I was assuming there would be some limitation on how far back the api would go but even just a few days should return thousands of results for this category. Am I missing something in the code or is this just a limitation of the ebay API? I did make sure I was using production and not the sandbox.
So I have realized now that there are multiple pages to my query up to the 100 item / 100 page max. I am now running into issues with the date filtering. I see the filter reference material on site but I am still not getting the result I expect. In the updated query I am trying to pull only items completed yesterday but when running I am getting stuff from today. Is there a better way to input the date filters?
from ebaysdk.finding import Connection as finding
from bs4 import BeautifulSoup
import os
import csv
api = finding(appid=<my appid>,config_file=None)
response = api.execute(
'findCompletedItems', {
'categoryId': '214',
'keywords' : 'prizm',
'endTimeFrom' : '2020-02-03T00:00:00.000Z',
'endTimeTo' : '2020-02-04T00:00:00.000Z' ,
'paginationInput': {
'entriesPerPage': '100',
'pageNumber': '1'
},
'sortOrder': 'EndTimeSoonest'
}
)
soup = BeautifulSoup(response.content , 'lxml')
totalitems = int(soup.find('totalentries').text)
items = soup.find_all('item')
for item in response.reply.searchResult.item:
print(item.itemId)
print(item.listingInfo.endTime)
I finally figured this out. I needed to add additional code for the item filters. The working code is below.
from ebaysdk.finding import Connection as finding
from bs4 import BeautifulSoup
import os
import csv
api = finding(appid=<my appid>,config_file=None)
response = api.execute(
'findCompletedItems', {
'categoryId': '214',
'keywords' : 'prizm',
'itemFilter': [
{'name': 'EndTimeFrom', 'value': '2020-02-03T00:00:00.000Z'},
{'name': 'EndTimeTo', 'value': '2020-02-04T00:00:00.000Z'}
#{'name': 'MinPrice', 'value': '200', 'paramName': 'Currency', 'paramValue': 'GBP'},
#{'name': 'MaxPrice', 'value': '400', 'paramName': 'Currency', 'paramValue': 'GBP'}
],
'paginationInput': {
'entriesPerPage': '100',
'pageNumber': '100'
},
'sortOrder': 'EndTimeSoonest'
}
)
soup = BeautifulSoup(response.content , 'lxml')
totalitems = int(soup.find('totalentries').text)
items = soup.find_all('item')
for item in response.reply.searchResult.item:
print(item.itemId)
print(item.listingInfo.endTime)

Python code breaks when attemting to download larger zipped csv file, works fine on smaller file

while working with small zipfiles(about 8MB) containg 25MB of CSV files the below code works exactly as it should. As soon as I attempt to download larger files (45MB zip file containing a 180MB csv) the code breaks and I get the following error message:
(venv) ufulu#ufulu awr % python get_awr_ranking_data.py
https://api.awrcloud.com/v2/get.php?action=get_topsites&token=REDACTED&project=REDACTED Client+%5Bw%5D&fileName=2017-01-04-2019-10-09
Traceback (most recent call last):
File "get_awr_ranking_data.py", line 101, in <module>
getRankingData(project['name'])
File "get_awr_ranking_data.py", line 67, in getRankingData
processRankingdata(rankDateData['details'])
File "get_awr_ranking_data.py", line 79, in processRankingdata
domain.append(row.split("//")[-1].split("/")[0].split('?')[0])
AttributeError: 'float' object has no attribute 'split'
My goal is to download data for 170 projects and save the data to sqlite DB.
Please bear with me me as I am a novice in the field of programming and python. I would greatly appreciate any help to fixing the code below as well as any other sugestions and improvements to making the code more robust and pythonic.
Thanks in advance
from dotenv import dotenv_values
import requests
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
from sqlalchemy import create_engine
# SQL Alchemy setup
engine = create_engine('sqlite:///rankingdata.sqlite', echo=False)
# Excerpt from the initial API Call
data = {'projects': [{'name': 'Client1',
'id': '168',
'frequency': 'daily',
'depth': '5',
'kwcount': '80',
'last_updated': '2019-10-01',
'keywordstamp': 1569941983},
{
"depth": "5",
"frequency": "ondemand",
"id": "194",
"kwcount": "10",
"last_updated": "2019-09-30",
"name": "Client2",
"timestamp": 1570610327
},
{
"depth": "5",
"frequency": "ondemand",
"id": "196",
"kwcount": "100",
"last_updated": "2019-09-30",
"name": "Client3",
"timestamp": 1570610331
}
]}
#setup
api_url = 'https://api.awrcloud.com/v2/get.php?action='
urls = [] # processed URLs
urlbacklog = [] # URLs that didn't return a downloadable File
# API Call to recieve URL containing downloadable zip and csv
def getRankingData(project):
action = 'get_dates'
response = requests.get(''.join([api_url, action]),
params=dict(token=dotenv_values()['AWR_API'],
project=project))
response = response.json()
action2 = 'topsites_export'
rankDateData = requests.get(''.join([api_url, action2]),
params=dict(token=dotenv_values()['AWR_API'],
project=project, startDate=response['details']['dates'][0]['date'], stopDate=response['details']['dates'][-1]['date'] ))
rankDateData = rankDateData.json()
print(rankDateData['details'])
urls.append(rankDateData['details'])
processRankingdata(rankDateData['details'])
# API Call to download and unzip csv data and process it in pandas
def processRankingdata(url):
content = requests.get(url)
# {"response_code":25,"message":"Export in progress. Please come back later"}
if "response_code" not in content:
f = ZipFile(BytesIO(content.content))
#print(f.namelist()) to get all filenames in Zip
with f.open(f.namelist()[0], 'r') as g: rankingdatadf = pd.read_csv(g)
rankingdatadf = rankingdatadf[rankingdatadf['Search Engine'].str.contains("Google")]
domain = []
for row in rankingdatadf['URL']:
domain.append(row.split("//")[-1].split("/")[0].split('?')[0])
rankingdatadf['Domain'] = domain
rankingdatadf['Domain'] = rankingdatadf['Domain'].str.replace('www.', '')
rankingdatadf = rankingdatadf.drop(columns=['Title', 'Meta description', 'Snippet', 'Page'])
print(rankingdatadf['Search Engine'][0])
writeData(rankingdatadf)
else:
urlbacklog.append(url)
pass
# Finally write the data to database
def writeData(rankingdatadf):
table_name_from_file = project['name']
check = engine.has_table(table_name_from_file)
print(check) # boolean
if check == False:
rankingdatadf.to_sql(table_name_from_file, con=engine)
print(project['name'] + ' ...Done')
else:
print(project['name'] + ' ... already in DB')
for project in data['projects']:
getRankingData(project['name'])
The problem seems to be the split call on a float and not necessarily the download. Try changing line 79
from
domain.append(row.split("//")[-1].split("/")[0].split('?')[0])
to
domain.append(str(str(str(row).split("//")[-1]).split("/")[0]).split('?')[0])
It looks like you're trying to parse the network location portion of the URL here, you can also use urllib.parse to make this easier instead of chaining all the splits:
from urllib.parse import urlparse
...
for row in rankingdatadf['URL']:
domain.append(urlparse(row).netloc)
I think a malformed URL is causing you issues, try (to diagnose issue):
try :
for row in rankingdatadf['URL']:
try:
domain.append(urlparse(row).netloc)
catch Exception:
exit(row)
Looks like you figured it out above, you have a database entry with a NULL value for the URL field. Not sure what your fidelity requirements for this data set are but might want to enforce database rules for URL field, or use pandas to drop rows where URL is NaN.
rankingdatadf = rankingdatadf.dropna(subset=['URL'])

writing json-ish list to csv, line by line, in python for bitcoin addresses

I'm querying the onename api in an effort to get the bitcoin addresses of all the users.
At the moment I'm getting all the user information as a json-esque list, and then piping the output to a file, it looks like this:
[{'0': {'owner_address': '1Q2Tv6f9vXbdoxRmGwNrHbjrrK4Hv6jCsz', 'zone_file': '{"avatar": {"url": "https://s3.amazonaws.com/kd4/111"}, "bitcoin": {"address": "1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh"}, "cover": {"url": "https://s3.amazonaws.com/dx3/111"}, "facebook": {"proof": {"url": "https://facebook.com/jasondrake1978/posts/10152769170542776"}, "username": "jasondrake1978"}, "graph": {"url": "https://s3.amazonaws.com/grph/111"}, "location": {"formatted": "Mechanicsville, Va"}, "name": {"formatted": "Jason Drake"}, "twitter": {"username": "000001"}, "v": "0.2", "website": "http://1642.com"}', 'verifications': [{'proof_url': 'https://facebook.com/jasondrake1978/posts/10152769170542776', 'service': 'facebook', 'valid': False, 'identifier': 'jasondrake1978'}], 'profile': {'website': 'http://1642.com', 'cover': {'url': 'https://s3.amazonaws.com/dx3/111'}, 'facebook': {'proof': {'url': 'https://facebook.com/jasondrake1978/posts/10152769170542776'}, 'username': 'jasondrake1978'}, 'twitter': {'username': '000001'}, 'bitcoin': {'address': '1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh'}, 'name': {'formatted': 'Jason Drake'}, 'graph': {'url': 'https://s3.amazonaws.com/grph/111'}, 'location': {'formatted': 'Mechanicsville, Va'}, 'avatar': {'url': 'https://s3.amazonaws.com/kd4/111'}, 'v': '0.2'}}}]
what I'm really interested in is the field {"address": "1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh"}, the rest of the stuff I don't need, I just want the addresses of every user.
Is there a way that I can just write only the addresses to a file using python?
I'm trying to write it as something like:
1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh,
1GA9RVZHuEE8zm4ooMTiqLicfnvymhzRVm,
1BJdMS9E5TUXxJcAvBriwvDoXmVeJfKiFV,
1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh,
...
and so on.
I've tried a number of different ways using dump, dumps, etc. but I haven't yet been able to pin it down.
My code looks like this:
import os
import json
import requests
#import py2neo
import csv
# set up authentication parameters
#py2neo.authenticate("46.101.180.63:7474", "neo4j", "uni-bonn")
# Connect to graph and add constraints.
neo4jUrl = os.environ.get('NEO4J_URL',"http://46.101.180.63:7474/db/data/")
#graph = py2neo.Graph(neo4jUrl)
# Add uniqueness constraints.
#graph.run("CREATE CONSTRAINT ON (q:Person) ASSERT q.id IS UNIQUE;")
# Build URL.
apiUrl = "https://api.onename.com/v1/users"
# apiUrl = "https://raw.githubusercontent.com/s-matthew-english/26.04/master/test.json"
# Send GET request.
Allusersjson = requests.get(apiUrl, headers = {"accept":"application/json"}).json()
#print(json)])
UsersDetails=[]
for username in Allusersjson['usernames']:
usernamex= username[:-3]
apiUrl2="https://api.onename.com/v1/users/"+usernamex+"?app-id=demo-app-id&app-secret=demo-app-secret"
userinfo=requests.get(apiUrl2, headers = {"accept":"application/json"}).json()
# try:
# if('bitcoin' not in userinfo[usernamex]['profile']):
# continue
# else:
# UsersDetails.append(userinfo)
# except:
# continue
try:
address = userinfo[usernamex]["profile"]["bitcoin"]["address"]
UsersDetails.append(address)
except KeyError:
pass # no address
out = "\n".join(UsersDetails)
print(out)
open("out.csv", "w").write(out)
# f = csv.writer(open("test.csv", "wb+"))
# Build query.
query = """
RETURN {json}
"""
# Send Cypher query.
# py2neo.CypherQuery(graph, query).run(json=json)
# graph.run(query).run(json=json)
#graph.run(query,json=json)
anyway, in such a situation, what's the best way to write out those addresses as csv :/
UPDATE
I ran it, and at first it worked, but then I got the following error:
Instead of adding all the information to the UsersDetails list
UsersDetails.append(userinfo)
you can add just the relevant part (address)
try:
address = userinfo[usernamex]["profile"]["bitcoin"]["address"]
UsersDetails.append(address)
except KeyError:
pass # no address
except TypeError:
pass # illformed data
To print the values to the screen:
out = "\n".join(UsersDetails)
print(out)
(replace "\n" with "," for comma separated output, instead of one per line)
To save to a file:
open("out.csv", "w").write(out)
You need to reformat the list, either through map() or a list comprehension, to get it down to just the information you want. For example, if the top-level key used in the response from the api.onename.com API is always 0, you can do something like this
UsersAddresses = [user['0']['profile']['bitcoin']['address'] for user in UsersDetails]

Sending list of dicts as value of dict with requests.post going wrong

I have clien-server app.
I localized trouble and there logic of this:
Client:
# -*- coding: utf-8 -*-
import requests
def fixing:
response = requests.post('http://url_for_auth/', data={'client_id': 'client_id',
'client_secret':'its_secret', 'grant_type': 'password',
'username': 'user', 'password': 'password'})
f = response.json()
data = {'coordinate_x': 12.3, 'coordinate_y': 8.4, 'address': u'\u041c, 12',
'products': [{'count': 1, 'id': 's123'},{'count': 2, 'id': 's124'}]}
data.update(f)
response = requests.post('http://url_for_working/, data=data)
response.text #There I have an Error about which I will say later
oAuth2 working well. But in server-side I have no products in request.data
<QueryDict: {u'token_type': [u'type_is_ok'], u'access_token': [u'token_is_ok'],
u'expires_in': [u'36000'], u'coordinate_y': [u'8.4'],
u'coordinate_x': [u'12.3'], u'products': [u'count', u'id', u'count',
u'id'], u'address': [u'\u041c, 12'], u'scope': [u'read write'],
u'refresh_token': [u'token_is_ok']}>
This part of QueryDict make me sad...
'products': [u'count', u'id', u'count', u'id']
And when I tried to make python dict:
request.data.dict()
... u'products': u'id', ...
And for sure other fields working well with Django serializer's validation. But not that, because there I have wrong values.
Looks like request (because it have x-www-encoded-form default) cant include list of dicts as value for key in dict so... I should use json in this case.
Finally I maked this func:
import requests
import json
def fixing:
response = requests.post('http://url_for_auth/', data={'client_id': 'client_id',
'client_secret':'its_secret', 'grant_type': 'password',
'username': 'user', 'password': 'password'})
f = response.json()
headers = {'authorization': f['token_type'].encode('utf-8')+' '+f['access_token'].encode('utf-8'),
'Content-Type': 'application/json'}
data = {'coordinate_x': 12.3, 'coordinate_y': 8.4, 'address': u'\u041c, 12',
'products': [{'count': 1, 'id': 's123'},{'count': 2, 'id': 's124'}]}
response = requests.post('http://url_for_working/', data=json.dumps(data),
headers=headers)
response.text
There I got right response.
Solved!
Hello i would like to refresh this topic, cause i have similar problem to this and above solution doesn`t work for me.
import requests
import urllib.request
import pprint
import json
from requests import auth
from requests.models import HTTPBasicAuth
payload = {
'description': 'zxcy',
'tags':[{
'id': 22,
'label': 'Card'}]
}
files = {'file': open('JAM5.pdf','rb')}
client_id = 32590
response = requests.post('https://system...+str(client_id)' , files=files ,data=payload, auth=HTTPBasicAuth(...)
Above code succesfully add file to CRM system and description to added file, but i have to add label to this too, and its seems doesnt work at all
When i try it with data=json.dumps(payload) i got this:
raise ValueError("Data must not be a string.")
ValueError: Data must not be a string.

Categories

Resources