How to pass byte data in the xml payload - python

I am trying to pass a xml payload as an input to a function and in the payload objectZippedData field doesn't take string as an input, instead it needs the input to be in byte format.
def getSoapResponse(Envelope, url, action):
try:
envelope = Envelope
# Create and register opener. Requires proxy when behind a firewall
opener = urllib.request.build_opener(urllib.request.HTTPHandler(), urllib.request.HTTPSHandler(),
urllib.request.ProxyHandler())
urllib.request.install_opener(opener)
# Create request for the service call
request = urllib.request.Request(url)
# Configure the request content type to be xml
request.add_header("Content-Type", 'application/soap+xml;charset=utf-8')
# Set the SOAP action to be invoked; while the call works without this, the value is expected to be set based on standards
request.add_header("SOAPAction", action)
# Write the xml payload to the request
request.data = envelope.encode()
handle = urllib.request.urlopen(request, cafile=certifi.where())
# Get the response and process it
xmlString = (handle.read(2000).decode('utf-8'))
if action != "uploadObjectInSession":
# Convert the response into XML tree structure
sessionxml = fromstring(xmlString)
# Get the session value from the XML tree
ReturnVal = sessionxml[0][0][0].text if action == 'login' else 'Success'
return (ReturnVal)
else:
return "Success"
except Exception as err:
print("getSoapResponse - Error occurred; Message ", str(err))
print("===============================================================")
return "Error"
def uploadSession(catFile, Target):
try:
with open(catFile, 'rb') as f:
bytes_encoded = base64.b64encode(f.read())
string_encoded = bytes_encoded.decode()
f.close()
except IOError:
return "Error"
new_Envelope = """<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:v2="http://xmlns.oracle.com/oxp/service/v2">
<soapenv:Header/>
<soapenv:Body>
<v2:uploadObjectInSession>
<v2:reportObjectAbsolutePathURL>""" + Target + """</v2:reportObjectAbsolutePathURL>
<v2:objectType>xdoz</v2:objectType>
<v2:objectZippedData>""" + string_encoded + """</v2:objectZippedData>
<v2:bipSessionToken>""" + target_sessionid + """</v2:bipSessionToken>
</v2:uploadObjectInSession>
</soapenv:Body>
</soapenv:Envelope> """
Final = getSoapResponse(Envelope=new_Envelope, url, action='uploadObjectInSession')
if Final=="Error":
print("FAILURE")
return "Error"
else:
print(Final)
return "Success"
Once the above function is called, and if I pass the string in objectZippedData, it fails to succeed whereas if I pass byte data in objectZippedData, I get the below error
new_Envelope = """<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:v2="http://xmlns.oracle.com/oxp/service/v2">
TypeError: can only concatenate str (not "bytes") to str

Use .encode() to convert a string to bytes
>>> mystr="test"
>>> type(mystr)
<class 'str'>
>>> type(mystr.encode())
<class 'bytes'>
Your error suggests that when creating your newEnvelope by concatenating (using +) to accumulate your inputs together, one of them is actually of type bytes, instead of a string.
Find which one and use .decode() to turn it into a string.

Related

Python (requests) - incorrect encoding when fetching headers

I am using requests library (python 3.9) to get filename from URL.[1] For some reason a file name is incorrectly encoded.
I should get "Ogłoszenie_0320.pdf" instead of "OgÅ\x82oszenie_0320.pdf".
My code looks something like this:
import requests
import re
def getFilenameFromRequest(url : str, headers):
# Parses from header information
contentDisposition = headers.get('content-disposition')
if contentDisposition:
filename = re.findall('filename=(.+)', contentDisposition)
print("oooooooooo: " + contentDisposition + " : " + str(filename))
if len(filename) != 0:
return filename[0]
# Parses from url
parsedUrl = urlparse(url)
return os.path.basename(parsedUrl.path)
def getFilenameFromUrl(url : str):
request = requests.head(url)
headers = request.headers
return getFilenameFromRequest(url, headers)
getFilenameFromUrl('https://przedszkolekw.bip.gov.pl'+
'/fobjects/download/880287/ogloszenie-uzp-nr-613234-pdf.html')
Any idea how to fix it?
I know for standard request I can set encoding directly:
request.encoding = 'utf-8'
But what am I supposed to do with this case?
[1]
https://przedszkolekw.bip.gov.pl/fobjects/download/880287/ogloszenie-uzp-nr-613234-pdf.html
Only characters from the ascii based latin-1 should be used as header values [rfc]. Here the file name has been escaped.
>>> s = "Ogłoszenie_0320.pdf"
>>> s.encode("utf8").decode("unicode-escape")
'OgÅ\x82oszenie_0320.pdf'
To reverse the process you can do
>>> sx = 'OgÅ\x82oszenie_0320.pdf'
>>> sx.encode("latin-1").decode("utf8")
'Ogłoszenie_0320.pdf'
(updated after conversation in comments)

Python: Unicode-objects must be encoded before hashing

I am working with an exchange for cryptocurrency which requires an encoding of the secret API key to gain access to private API calls. I have copied and pasted their Python code to begin executing my calls with it, but I receive this error every time I make a request.
TypeError: Unicode-objects must be encoded before hashing
I know what this means; I am a programmer. I can not find the root of the problem in the code I have received from the exchange, as I have not worked with hmac, hashlib, or base64. I have replaced all instances of the name of the exchange with the word "exchange" in the following code. No API keys are shown.
exchangeconfig = Exchange('key', 'secret')
base = 'https://exchange.com/'
def post_request(key, secret, path, data):
hmac_obj = hmac.new(secret, path + chr(0) + data, hashlib.sha512)
hmac_sign = base64.b64encode(hmac_obj.digest())
header = {
'Content-Type': 'application/json',
'User-Agent': 'exchangev2 based client',
'Rest-Key': key,
'Rest-Sign': hmac_sign,
}
proxy = ProxyHandler({'http': '127.0.0.1:8888'})
opener = build_opener(proxy)
install_opener(opener)
request = Request(base + path, data, header)
response = urlopen(request, data)
return json.load(response)
def gen_tonce():
return str(int(time.time() * 1e6))
class Exchange:
def __init__(self, key, secret):
self.key = key
self.secret = base64.b64decode(secret)
def request(self, path, params={}):
params = dict(params)
params['tonce'] = gen_tonce()
# data = urllib.urlencode(params)
data = json.dumps(params)
result = post_request(self.key, self.secret, path, data)
if result['result'] == 'success':
return result['data']
else:
raise Exception(result['result'])
exchangeconfig.request("api/3/account")
Please help me figure this out.
By the way: It seems to have a problem with this line in particular:
hmac_obj = hmac.new(secret, path + chr(0) + data, hashlib.sha512)
Thanks.
UPDATE: Fixed that error. Now onto this one:
TypeError: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str.
These hash libraries deal with bytes objects, so you should encode your strings to bytes first (assuming the decoding end uses UTF-8):
hmac_obj = hmac.new(secret, path.encode('utf-8') + b'\0' + data.encode('utf-8'), hashlib.sha512)

Why are my ZenDesk macros being updated, but no change actually going through?

I was trying to bulk edit the signature of my personal macros on ZenDesk, and the only way to do that is via the API. So I wrote this quick Python script to try to do it:
import sys
import time
import logging
import requests
import re
start_time = time.time()
# Set up logging
logger = logging.getLogger()
log_handler = logging.StreamHandler(sys.stdout)
log_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s - %(funcName)s - line %(lineno)d"))
log_handler.setLevel(logging.DEBUG)
logger.addHandler(log_handler)
logger.setLevel(logging.DEBUG)
def doTheGet(url, user, pwd):
response = requests.get(url, auth=(user + "/token", pwd))
if response.status_code != 200:
logger.error("Status: %s (%s) Problem with the request. Exiting. %f seconds elapsed" % (response.status_code, response.reason, time.time() - start_time))
exit()
data = response.json()
return data
def doThePut(url, updated_data, user, pwd):
response = requests.put(url, json="{'macro': {'actions': %r}}" % updated_data, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
if response.status_code != 200:
logger.error("Status: %s (%s) Problem with the request. Exiting. %f seconds elapsed" % (response.status_code, response.reason, time.time() - start_time))
exit()
data = response.json()
return data
def getMacros():
macros = {}
data = doTheGet("https://mydomain.zendesk.com/api/v2/macros.json", "me#mydomain.com", "111tokenZZZ")
def getMacros(macro_list, page, page_count):
if not page:
for macro in macro_list:
if macro["restriction"] and macro["active"]:
if macro["restriction"]["type"] == "User":
macros[macro["id"]] = macro["actions"]
else:
for macro in macro_list:
if macro["restriction"] and macro["active"]:
if macro["restriction"]["type"] == "User":
macros[macro["id"]] = macro["actions"]
page_count += 1
new_data = doTheGet(page, "me#mydomain.com", "111tokenZZZ")
new_macs = new_data["macros"]
new_next_page = new_data["next_page"]
getMacros(new_macs, new_next_page, page_count)
macs = data["macros"]
current_page = 1
next_page = data["next_page"]
getMacros(macs, next_page, current_page)
return macros
def updateMacros():
macros = getMacros()
regular = "RegEx to match signature to be replaced$" #since some macros already have the updated signature
for macro in macros:
for action in macros[macro]:
if action["field"] == "comment_value":
if re.search(regular, action["value"][1]):
ind = action["value"][1].rfind("\n")
action["value"][1] = action["value"][1][:ind] + "\nNew signature"
return macros
macs = updateMacros()
for mac in macs:
doThePut("https://mydomain.zendesk.com/api/v2/macros/%d.json" % (mac), macs[mac], "me#mydomain.com", "111tokenZZZ")
Now, everything's running as expected, and I get no errors. When I go to my macros on ZenDesk and sort them by last updated, I do see that the script did something, since they show as being last updated today. However, nothing changes on them. I made sure the data I'm sending over is edited (updateMacros is doing its job). I made sure the requests send back an OK response. So I'm sending updated data, getting back a 200 response, but the response sent back shows me the macros as they were before, with zero changes.
The only thing that occurs to me as maybe being wrong in some way is the format of the data I'm sending over, or something of the sort. But even then, I'd expect the response to not be a 200, then...
What am I missing here?
Looks like you're double-encoding the JSON data in your PUT request:
response = requests.put(url, json="{'macro': {'actions': %r}}" % updated_data, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
The json parameter expects an object, which it then dutifully encodes as JSON and sends as the body of the request; this is merely a convenience; the implementation is simply,
if not data and json is not None:
# urllib3 requires a bytes-like body. Python 2's json.dumps
# provides this natively, but Python 3 gives a Unicode string.
content_type = 'application/json'
body = complexjson.dumps(json)
if not isinstance(body, bytes):
body = body.encode('utf-8')
(source: https://github.com/kennethreitz/requests/blob/master/requests/models.py#L424)
Since the value is always passed through json.dumps(), if you pass a string representing already-encoded JSON it will itself be encoded:
"{\'macro\': {\'actions\': [{\'field\': \'comment_value\', \'value\': [\'channel:all\', \'Spiffy New Sig that will Never Be Saved\']}]}}"
ZenDesk, upon being given JSON it doesn't expect, updates the updated_at field and... Does nothing else. You can verify this by passing an empty string - same result.
Note that you're also relying on Python's repr formatting to fill in your JSON; that's probably a bad idea too. Instead, let's just reconstruct our macro object and let requests encode it:
response = requests.put(url, json={'macro': {'actions': updated_data}}, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
This should do what you expect.

what's wrong with this python code

This is a code with Web crawler.
I'm a beginer in learning python.So i don't know how to solve.
It seems wrong with search()
# -*- coding:utf-8 -*-
import urllib,urllib2,re
class BDTB:
def __init__(self,baseUrl,seeLZ):
self.baseUrl = baseUrl
self.seeLZ = '?see_lz' + str(seeLZ)
def getPage(self,pageNum):
try:
url = self.baseUrl + self.seeLZ + '&pn=' + str(pageNum)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
#print response.read().decode('utf-8')
return response
except urllib2.URLError,e:
if hasattr(e,'reason'):
print u'连接百度贴吧失败,错误原因',e.reason
return None
def getTitle(self):
page = self.getPage(1)
pattern = re.compile('<h3 class.*?px">(.*?)</h3>',re.S)
result = re.search(pattern,page)
if result:
print result.group(1)
return result.group(1).strip()
else:
return None
baseURL = 'http://tieba.baidu.com/p/4095047339'
bdtb = BDTB(baseURL,1)
bdtb.getTitle()
This will raise a TypeError: expected string or buffer because you are passing the object returned from urllib2.urlopen(request) to re.search() when it requires an str.
If you change the return value from:
return responce # returns the object
to one that returns the text contained in the request:
return responce.read() # returns the text contained in the responce
Your script works and after executing it returns:
广告兼职及二手物品交易集中贴
Additionally, since you're working with Python 2.x you might want to change you object from class BDTB: to class BDTB(object) in order to use new style classes.

Python - seek in http response stream

Using urllibs (or urllibs2) and wanting what I want is hopeless.
Any solution?
I'm not sure how the C# implementation works, but, as internet streams are generally not seekable, my guess would be it downloads all the data to a local file or in-memory object and seeks within it from there. The Python equivalent of this would be to do as Abafei suggested and write the data to a file or StringIO and seek from there.
However, if, as your comment on Abafei's answer suggests, you want to retrieve only a particular part of the file (rather than seeking backwards and forwards through the returned data), there is another possibility. urllib2 can be used to retrieve a certain section (or 'range' in HTTP parlance) of a webpage, provided that the server supports this behaviour.
The range header
When you send a request to a server, the parameters of the request are given in various headers. One of these is the Range header, defined in section 14.35 of RFC2616 (the specification defining HTTP/1.1). This header allows you to do things such as retrieve all data starting from the 10,000th byte, or the data between bytes 1,000 and 1,500.
Server support
There is no requirement for a server to support range retrieval. Some servers will return the Accept-Ranges header (section 14.5 of RFC2616) along with a response to report if they support ranges or not. This could be checked using a HEAD request. However, there is no particular need to do this; if a server does not support ranges, it will return the entire page and we can then extract the desired portion of data in Python as before.
Checking if a range is returned
If a server returns a range, it must send the Content-Range header (section 14.16 of RFC2616) along with the response. If this is present in the headers of the response, we know a range was returned; if it is not present, the entire page was returned.
Implementation with urllib2
urllib2 allows us to add headers to a request, thus allowing us to ask the server for a range rather than the entire page. The following script takes a URL, a start position, and (optionally) a length on the command line, and tries to retrieve the given section of the page.
import sys
import urllib2
# Check command line arguments.
if len(sys.argv) < 3:
sys.stderr.write("Usage: %s url start [length]\n" % sys.argv[0])
sys.exit(1)
# Create a request for the given URL.
request = urllib2.Request(sys.argv[1])
# Add the header to specify the range to download.
if len(sys.argv) > 3:
start, length = map(int, sys.argv[2:])
request.add_header("range", "bytes=%d-%d" % (start, start + length - 1))
else:
request.add_header("range", "bytes=%s-" % sys.argv[2])
# Try to get the response. This will raise a urllib2.URLError if there is a
# problem (e.g., invalid URL).
response = urllib2.urlopen(request)
# If a content-range header is present, partial retrieval worked.
if "content-range" in response.headers:
print "Partial retrieval successful."
# The header contains the string 'bytes', followed by a space, then the
# range in the format 'start-end', followed by a slash and then the total
# size of the page (or an asterix if the total size is unknown). Lets get
# the range and total size from this.
range, total = response.headers['content-range'].split(' ')[-1].split('/')
# Print a message giving the range information.
if total == '*':
print "Bytes %s of an unknown total were retrieved." % range
else:
print "Bytes %s of a total of %s were retrieved." % (range, total)
# No header, so partial retrieval was unsuccessful.
else:
print "Unable to use partial retrieval."
# And for good measure, lets check how much data we downloaded.
data = response.read()
print "Retrieved data size: %d bytes" % len(data)
Using this, I can retrieve the final 2,000 bytes of the Python homepage:
blair#blair-eeepc:~$ python retrieverange.py http://www.python.org/ 17387
Partial retrieval successful.
Bytes 17387-19386 of a total of 19387 were retrieved.
Retrieved data size: 2000 bytes
Or 400 bytes from the middle of the homepage:
blair#blair-eeepc:~$ python retrieverange.py http://www.python.org/ 6000 400
Partial retrieval successful.
Bytes 6000-6399 of a total of 19387 were retrieved.
Retrieved data size: 400 bytes
However, the Google homepage does not support ranges:
blair#blair-eeepc:~$ python retrieverange.py http://www.google.com/ 1000 500
Unable to use partial retrieval.
Retrieved data size: 9621 bytes
In this case, it would be necessary to extract the data of interest in Python prior to any further processing.
It may work best just to write the data to a file (or even to a string, using StringIO), and to seek in that file (or string).
I did not find any existing implementations of a file-like interface with seek() to HTTP URLs, so I rolled my own simple version: https://github.com/valgur/pyhttpio. It depends on urllib.request but could probably easily be modified to use requests, if necessary.
The full code:
import cgi
import time
import urllib.request
from io import IOBase
from sys import stderr
class SeekableHTTPFile(IOBase):
def __init__(self, url, name=None, repeat_time=-1, debug=False):
"""Allow a file accessible via HTTP to be used like a local file by utilities
that use `seek()` to read arbitrary parts of the file, such as `ZipFile`.
Seeking is done via the 'range: bytes=xx-yy' HTTP header.
Parameters
----------
url : str
A HTTP or HTTPS URL
name : str, optional
The filename of the file.
Will be filled from the Content-Disposition header if not provided.
repeat_time : int, optional
In case of HTTP errors wait `repeat_time` seconds before trying again.
Negative value or `None` disables retrying and simply passes on the exception (the default).
"""
super().__init__()
self.url = url
self.name = name
self.repeat_time = repeat_time
self.debug = debug
self._pos = 0
self._seekable = True
with self._urlopen() as f:
if self.debug:
print(f.getheaders())
self.content_length = int(f.getheader("Content-Length", -1))
if self.content_length < 0:
self._seekable = False
if f.getheader("Accept-Ranges", "none").lower() != "bytes":
self._seekable = False
if name is None:
header = f.getheader("Content-Disposition")
if header:
value, params = cgi.parse_header(header)
self.name = params["filename"]
def seek(self, offset, whence=0):
if not self.seekable():
raise OSError
if whence == 0:
self._pos = 0
elif whence == 1:
pass
elif whence == 2:
self._pos = self.content_length
self._pos += offset
return self._pos
def seekable(self, *args, **kwargs):
return self._seekable
def readable(self, *args, **kwargs):
return not self.closed
def writable(self, *args, **kwargs):
return False
def read(self, amt=-1):
if self._pos >= self.content_length:
return b""
if amt < 0:
end = self.content_length - 1
else:
end = min(self._pos + amt - 1, self.content_length - 1)
byte_range = (self._pos, end)
self._pos = end + 1
with self._urlopen(byte_range) as f:
return f.read()
def readall(self):
return self.read(-1)
def tell(self):
return self._pos
def __getattribute__(self, item):
attr = object.__getattribute__(self, item)
if not object.__getattribute__(self, "debug"):
return attr
if hasattr(attr, '__call__'):
def trace(*args, **kwargs):
a = ", ".join(map(str, args))
if kwargs:
a += ", ".join(["{}={}".format(k, v) for k, v in kwargs.items()])
print("Calling: {}({})".format(item, a))
return attr(*args, **kwargs)
return trace
else:
return attr
def _urlopen(self, byte_range=None):
header = {}
if byte_range:
header = {"range": "bytes={}-{}".format(*byte_range)}
while True:
try:
r = urllib.request.Request(self.url, headers=header)
return urllib.request.urlopen(r)
except urllib.error.HTTPError as e:
if self.repeat_time is None or self.repeat_time < 0:
raise
print("Server responded with " + str(e), file=stderr)
print("Sleeping for {} seconds before trying again".format(self.repeat_time), file=stderr)
time.sleep(self.repeat_time)
A potential usage example:
url = "https://www.python.org/ftp/python/3.5.0/python-3.5.0-embed-amd64.zip"
f = SeekableHTTPFile(url, debug=True)
zf = ZipFile(f)
zf.printdir()
zf.extract("python.exe")
Edit: There is actually a mostly identical, if slightly more minimal, implementation in this answer: https://stackoverflow.com/a/7852229/2997179

Categories

Resources