How to fix encoding in Python Mechanize? - python

here is the sample code:
from mechanize import Browser
br = Browser()
page = br.open('http://hunters.tclans.ru/news.php?readmore=2')
br.form = br.forms().next()
print br.form
The problem is that server return incorrect encoding (windows-cp1251). How can I manually set the encoding of the current page in mechanize?
Error:
Traceback (most recent call last):
File "/tmp/stackoverflow.py", line 5, in <module>
br.form = br.forms().next()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_mechanize.py", line 426, in forms
return self._factory.forms()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 559, in forms
self._forms_factory.forms())
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 225, in forms
_urlunparse=_rfc3986.urlunsplit,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 967, in ParseResponseEx
_urlunparse=_urlunparse,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 1104, in _ParseFileEx
fp.feed(data)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 870, in feed
sgmllib.SGMLParser.feed(self, data)
File "/usr/lib/python2.6/sgmllib.py", line 104, in feed
self.goahead(0)
File "/usr/lib/python2.6/sgmllib.py", line 193, in goahead
self.handle_entityref(name)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 751, in handle_entityref
'&%s;' % name, self._entitydefs, self._encoding))
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 238, in unescape
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
File "/usr/lib/python2.6/re.py", line 151, in sub
return _compile(pattern, 0).sub(repl, string, count)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 230, in replace_entities
repl = repl.encode(encoding)
LookupError: unknown encoding: windows-cp1251

I don't know about Mechanize, but you could hack codecs to accept wrong encoding names that have both ‘windows’ and ‘cp’:
>>> def fixcp(name):
... if name.lower().startswith('windows-cp'):
... try:
... return codecs.lookup(name[:8]+name[10:])
... except LookupError:
... pass
... return None
...
>>> codecs.register(fixcp)
>>> '\xcd\xe0\xef\xee\xec\xe8\xed\xe0\xe5\xec'.decode('windows-cp1251')
u'Напоминаем'

Fixed by setting
br._factory.encoding = enc
br._factory._forms_factory.encoding = enc
br._factory._links_factory._encoding = enc
(note the underscores) after br.open()

Related

Sending raw transaction from web3py: TypeError: <lambda>() missing 4 required positional arguments: 'hash', 'r', 's', and 'v'

I am trying to send raw transaction by web3py using this code:
t = w3.eth.account.sign_transaction(test_contract.functions.edit("test").buildTransaction(
{
"nonce": w3.eth.get_transaction_count(w3.eth.default_account)
}
), pkey)
w3.eth.send_raw_transaction(t)
But, where python comes to the last line, I have this error in console:
Traceback (most recent call last):
File "***/main.py", line 64, in <module>
w3.eth.send_raw_transaction(t)
File "***/venv/lib/python3.9/site-packages/web3/module.py", line 53, in caller
(method_str, params), response_formatters = method.process_params(module, *args, **kwargs) # noqa: E501
File "***/venv/lib/python3.9/site-packages/web3/method.py", line 194, in process_params
_apply_request_formatters(params, self.request_formatters(method)))
File "***/venv/lib/python3.9/site-packages/eth_utils/functional.py", line 45, in inner
return callback(fn(*args, **kwargs))
File "***/venv/lib/python3.9/site-packages/web3/method.py", line 50, in _apply_request_formatters
formatted_params = pipe(params, request_formatters)
File "cytoolz/functoolz.pyx", line 667, in cytoolz.functoolz.pipe
File "cytoolz/functoolz.pyx", line 642, in cytoolz.functoolz.c_pipe
File "cytoolz/functoolz.pyx", line 254, in cytoolz.functoolz.curry.__call__
File "cytoolz/functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "***/venv/lib/python3.9/site-packages/web3/_utils/abi.py", line 799, in map_abi_data
return pipe(data, *pipeline)
File "cytoolz/functoolz.pyx", line 667, in cytoolz.functoolz.pipe
File "cytoolz/functoolz.pyx", line 642, in cytoolz.functoolz.c_pipe
File "cytoolz/functoolz.pyx", line 254, in cytoolz.functoolz.curry.__call__
File "cytoolz/functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "***/venv/lib/python3.9/site-packages/web3/_utils/abi.py", line 833, in data_tree_map
return recursive_map(map_to_typed_data, data_tree)
File "***/venv/lib/python3.9/site-packages/web3/_utils/decorators.py", line 30, in wrapped
wrapped_val = to_wrap(*args)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 89, in recursive_map
items_mapped = map_collection(recurse, data)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 76, in map_collection
return datatype(map(func, collection))
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 88, in recurse
return recursive_map(func, item)
File "***/venv/lib/python3.9/site-packages/web3/_utils/decorators.py", line 30, in wrapped
wrapped_val = to_wrap(*args)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 89, in recursive_map
items_mapped = map_collection(recurse, data)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 76, in map_collection
return datatype(map(func, collection))
File "***/venv/lib/python3.9/site-packages/web3/_utils/abi.py", line 855, in __new__
return super().__new__(cls, *iterable)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 88, in recurse
return recursive_map(func, item)
File "***/venv/lib/python3.9/site-packages/web3/_utils/decorators.py", line 30, in wrapped
wrapped_val = to_wrap(*args)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 89, in recursive_map
items_mapped = map_collection(recurse, data)
File "***/venv/lib/python3.9/site-packages/web3/_utils/formatters.py", line 76, in map_collection
return datatype(map(func, collection))
TypeError: <lambda>() missing 4 required positional arguments: 'hash', 'r', 's', and 'v'
I am using infura custom node, that's why I cant send transaction by contract.functions.method.transact(). Don't know to do with this error, spent a lot of time reading docs and got nothing.
How can I fix that?
sign_transaction returns SignedTransaction object while send_raw_transaction accepts raw transaction bytes. So change your last line to:
w3.eth.send_raw_transaction(t.rawTransaction)
You also probably want to save the result to a variable to track the transaction later.
You need to sign the transaction before sending with your account that has ETH balance.
You need to use Signing middleware.
>>> from web3 import Web3, EthereumTesterProvider
>>> w3 = Web3(EthereumTesterProvider)
>>> from web3.middleware import construct_sign_and_send_raw_middleware
>>> from eth_account import Account
>>> acct = Account.create('KEYSMASH FJAFJKLDSKF7JKFDJ 1530')
>>> w3.middleware_onion.add(construct_sign_and_send_raw_middleware(acct))
>>> w3.eth.default_account = acct.address
# Now you can send a tx from acct.address without having to build and sign each raw transaction

Can't use API with username and password in Scrapy

This Curl works.
https://<user>:<pass>#xecdapi.xe.com/v1/convert_from.json/?from=1000000&to=SGD&amount=AED,AUD,BDT&inverse=True
But this Scrapy request doesn't work.
yield scrapy.Request("https://<user>:<pass>#xecdapi.xe.com/v1/convert_from.json/?from=1000000&to=SGD&amount=AED,AUD,BDT&inverse=True")
It returns this error:
Traceback (most recent call last):
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\internet\defer.py", line 1297, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\python\failure.py", line 389, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\core\downloader\middleware.py", line 43, in process_request
defer.returnValue((yield download_func(request=request,spider=spider)))
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\utils\defer.py", line 45, in mustbe_deferred
result = f(*args, **kw)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\core\downloader\handlers\__init__.py", line 65, in download_request
return handler.download_request(request, spider)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\core\downloader\handlers\http11.py", line 61, in download_request
return agent.download_request(request)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\core\downloader\handlers\http11.py", line 286, in download_request
method, to_bytes(url, encoding='ascii'), headers, bodyproducer)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\web\client.py", line 1596, in request
endpoint = self._getEndpoint(parsedURI)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\web\client.py", line 1580, in _getEndpoint
return self._endpointFactory.endpointForURI(uri)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\web\client.py", line 1456, in endpointForURI
uri.port)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\scrapy\core\downloader\contextfactory.py", line 59, in creatorForNetloc
return ScrapyClientTLSOptions(hostname.decode("ascii"), self.getContext())
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\internet\_sslverify.py", line 1201, in __init__
self._hostnameBytes = _idnaBytes(hostname)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\twisted\internet\_sslverify.py", line 87, in _idnaBytes
return idna.encode(text)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\idna\core.py", line 355, in encode
result.append(alabel(label))
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\idna\core.py", line 276, in alabel
check_label(label)
File "d:\kerja\hit\python~1\<project_name>\<project_name>\lib\site-packages\idna\core.py", line 253, in check_label
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
InvalidCodepoint: Codepoint U+003A at position 28 of u'xxxxxxxxxxxxxxxxxxxxxxxxxxxx:xxxxxxxxxxxxxxxxxxxxxxxxxxx#xecdapi' not allowed
Scrapy does not support HTTP Authentication via URL. We have to use HTTPAuthMiddleware instead.
in settings.py:
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 811,
}
in the spider:
from scrapy.spiders import CrawlSpider
class SomeIntranetSiteSpider(CrawlSpider):
http_user = 'someuser'
http_pass = 'somepass'
name = 'intranet.example.com'
# .. rest of the spider code omitted ...

Python HTMLParser Not Reading Whole File

from HTMLParser import HTMLParser
class HTMLParserDos(HTMLParser):
full_text = ""
def handle_data(self, data):
self.full_text += data
return self.full_text
h = HTMLParserDos()
file = open('emails.txt', 'r')
h.feed(file.read())
file.close()
print h.container
This code is getting an error:
Traceback (most recent call last): File "/Users/laurenstrom/Google
Drive/PYTHON/RANDO_CALRISSIAN/html_parse", line 15, in
h.feed(file.read()) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 108, in feed
self.goahead(0) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 148, in goahead
k = self.parse_starttag(i) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 229, in parse_starttag
endpos = self.check_for_whole_start_tag(i) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 304, in check_for_whole_start_tag
self.error("malformed start tag") File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py",
line 115, in error
raise HTMLParseError(message, self.getpos()) HTMLParseError: malformed start tag, at line 7, column 18
I'm not sure what I'm missing about .feed() but I can't seem to find anything about why it won't just read the whole file.
Your are asking the HTML parser to parse a file most of which isn't HTML. It is tripping over line 7 of your file. Which is :
Return-Path: <Tom#sjnetworkconsulting.com>
I would imagine it is seeing the < and assuming that is HTML which of course it is not.

Python Pandas print error in Eclipse's PyDev: unknown encoding: MS874

I am trying to use Pandas library to read csv files, using Eclipse's PyDev.
foo.csv file:
"head1", "head2",
"A", "123"
test.py:
import pandas as pd
data = pd.read_csv('foo.csv');
print data
I ran this and got an error:
Traceback (most recent call last):
File "C:\Users\qqq\studyspace\macd\test3.py", line 4, in <module>
print data
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 666, in __str__
return self.__bytes__()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 676, in __bytes__
return self.__unicode__().encode(encoding, 'replace')
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 691, in __unicode__
fits_horizontal = self._repr_fits_horizontal_()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 651, in _repr_fits_horizontal_
d.to_string(buf=buf)
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 1488, in to_string
formatter.to_string()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 314, in to_string
strcols = self._to_str_columns()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 258, in _to_str_columns
str_index = self._get_formatted_index()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 472, in _get_formatted_index
fmt_index = [index.format(name=show_index_names, formatter=fmt)]
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 450, in format
return self._format_with_header(header, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 472, in _format_with_header
result = _trim_front(format_array(values, None, justify='left'))
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1321, in format_array
return fmt_obj.get_result()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1448, in get_result
return _make_fixed_width(fmt_values, self.justify)
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1495, in _make_fixed_width
max_len = np.max([_strlen(x) for x in strings])
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 184, in _strlen
return len(x.decode(encoding))
LookupError: unknown encoding: MS874
I have tried to run this in IPython, and it does not give the error, so I think the problem is with my Eclipse setting. I use Eclipse Juno and I installed Pandas via Python(x,y).
I have tried to solve it blindly like this
import pandas as pd
data = pd.read_csv('foo.csv');
b = True;
while(b):
try:
print data
b = False
except:
print 'foooo'
And it just printed 'foooo' forever.
I have found the solution.
Right click on the project => Properties => Resource => Text file encoding. Choose other => UTF-8.

Trouble using gdata and Unicode Cyrillic in Python

I have this code
# -*- coding: utf8 -*-
__author__ = 'user'
import gdata.youtube.service
yt_service = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
query.vq = u"не"
feed = yt_service.YouTubeQuery(query)
for yt_item in feed.entry:
print yt_item.GetSwfUrl()
And I am getting this error:
Traceback (most recent call last):
File "cyr_search.py", line 7, in
feed = yt_service.YouTubeQuery(query)
File "/Users/user/Documents/GrabaHeroku/graba_h_ve/lib/python2.7/site-packages/gdata/youtube/service.py", line 1346, in YouTubeQuery
result = self.Query(query.ToUri())
File "/Users/user/Documents/GrabaHeroku/graba_h_ve/lib/python2.7/site-packages/gdata/service.py", line 1715, in ToUri
return atom.service.BuildUri(q_feed, self)
File "/Users/user/Documents/GrabaHeroku/graba_h_ve/lib/python2.7/site-packages/atom/service.py", line 584, in BuildUri
parameter_list = DictionaryToParamList(url_params, escape_params)
File "/Users/user/Documents/GrabaHeroku/graba_h_ve/lib/python2.7/site-packages/atom/service.py", line 551, in DictionaryToParamList
for param, value in (url_parameters or {}).items()]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 1275, in quote_plus
return quote(s, safe)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 1268, in quote
return ''.join(map(quoter, s))
KeyError: u'\u043d'
How do I search for non-ASCII. Do I need to url encode the query? I thought the library will do that on its own.
Change to:
query.vq = u"не".encode('utf8')
The string needs to be encoded before being sent.

Categories

Resources