Related
import dask.bag as db
class Converter():
def __init__(self,input,output):
"""Converter constructor"""
self.input = input
self.output = output
#staticmethod
def large_file_reader(file_path: str):
"""
File reader
"""
temp_data = db.read_avro(file_path)
data = temp_data.to_dataframe()
# just to check able to by read properly
print(data.head(6))
return data
#staticmethod
def large_file_writer(data, file_path: str) -> bool:
"""
File writer
"""
data.compute().to_csv(file_path, index=False)
def large_file_processor(self):
"Read then write"
input_file_path =self.input
output_file_path =self.output
data = Converter.large_file_reader(input_file_path)
Converter.large_file_writer(data=data, file_path=output_file_path)
if __name__ == "__main__":
c = Converter("/Users/csv_to_avro_new.avro", "/Users/test_avro_new.csv")
c.large_file_processor()
Traceback (most recent call last):
File "/Users/PycharmProjects/ms--py/new.py", line 41, in <module>
c.large_file_processor()
File "/Users/PycharmProjects/ms--py/new.py", line 36, in large_file_processor
Converter.large_file_writer(data=data, file_path=output_file_path)
File "/Users/PycharmProjects/ms--py/new.py", line 28, in large_file_writer
data.compute().to_csv(file_path, index=False)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/base.py", line 315, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/base.py", line 600, in compute
results = schedule(dsk, keys, **kwargs)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/threaded.py", line 89, in get
results = get_async(
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 511, in get_async
raise_exception(exc, tb)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 319, in reraise
raise exc
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 224, in execute_task
result = _execute_task(task, data)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in <genexpr>
return func(*(_execute_task(a, cache) for a in args))
File "/Users/adavsandeep/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/bag/avro.py", line 150, in read_chunk
chunk = read_block(f, off, l, head["sync"])
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/utils.py", line 244, in read_block
found_start_delim = seek_delimiter(f, delimiter, 2**16)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/utils.py", line 187, in seek_delimiter
current = file.read(blocksize)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/implementations/local.py", line 337, in read
return self.f.read(*args, **kwargs)
ValueError: read of closed file
Process finished with exit code 1
`Added the error traceback.check it once.Thanks.
Error
ValueError: read of closed file
Tried convert to some other formats like csv to json in the same way it was working
But not able to convert avro to csv, avro to json and avro to parquet
My file size is more than 2GB .Thats the reason I am using dask.
Thanks in advance .
`
I want to save the data set as a parquet file, called power.parquet, and I use df.to_parquet(<filename>). But it gives me this errer "ValueError: Error converting column "Global_reactive_power" to bytes using encoding UTF8. Original error: bad argument type for built-in operation" And I installed the fastparquet package.
from fastparquet import write, ParquetFile
dat.to_parquet("power.parquet")
df_parquet = ParquetFile("power.parquet").to_pandas()
df_parquet.head() # Test your final value
`*Traceback (most recent call last):
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 259, in convert
out = array_encode_utf8(data)
File "fastparquet/speedups.pyx", line 50, in fastparquet.speedups.array_encode_utf8
TypeError: bad argument type for built-in operation
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/folders/4f/bm2th1p56tz4rq_zffc8g3940000gn/T/ipykernel_85477/3080656655.py", line 1, in <module>
dat.to_parquet("power.parquet", compression="GZIP")
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/core.py", line 4560, in to_parquet
return to_parquet(self, path, *args, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 732, in to_parquet
return compute_as_if_collection(
File "/opt/anaconda3/lib/python3.9/site-packages/dask/base.py", line 315, in compute_as_if_collection
return schedule(dsk2, keys, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/threaded.py", line 79, in get
results = get_async(
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 507, in get_async
raise_exception(exc, tb)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 315, in reraise
raise exc
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 220, in execute_task
result = _execute_task(task, data)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/opt/anaconda3/lib/python3.9/site-packages/dask/utils.py", line 35, in apply
return func(*args, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/io/parquet/fastparquet.py", line 1167, in write_partition
rg = make_part_file(
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 716, in make_part_file
rg = make_row_group(f, data, schema, compression=compression,
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 701, in make_row_group
chunk = write_column(f, coldata, column,
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 554, in write_column
repetition_data, definition_data, encode[encoding](data, selement), 8 * b'\x00'
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 354, in encode_plain
out = convert(data, se)
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 284, in convert
raise ValueError('Error converting column "%s" to bytes using '
ValueError: Error converting column "Global_reactive_power" to bytes using encoding UTF8. Original error: bad argument type for built-in operation
*
I tried by adding object_coding = "bytes".I want to solve this problem.
I am trying to generate a qr code from text, and then insert into a reportlab pdf.
My code:
def qr_code_as_image(text):
from io import BytesIO
print("In show_qr")
img = generate_qr_code(text)
print(img, type(img))
i = Image(img)
print(i, type(i))
return i
def add_patient_header_with_qr(self):
line1 = ("Name", self.linkedcustomer.name,
"Age", self.linkedcustomer.age())
line2 = ("MRD No.", self.linkedcustomer.cstid,
"Date", self.prescription_time)
line3 = ("No.", "#", "Doctor", self.doc.name)
datatb = [line1, line2, line3]
patientdetailstable = Table(datatb)
patientdetailstable.setStyle(self.patientdetails_style)
col1 = patientdetailstable
checkin_url = reverse('clinicemr', args=[self.checkin.checkinno])
qr_image = qr_code_as_image(checkin_url)
qr_image.hAlign = 'LEFT'
col2 = Table([[qr_image]])
tblrow1 = Table([[col1, col2]], colWidths=None)
tblrow1.setStyle(self.table_left_top_align)
self.elements.append(tblrow1)
def final_generate(self, footer_content, action=None):
with NamedTemporaryFile(mode='w+b') as temp:
from django.http import FileResponse, Http404
from functools import partial
# use the temp file
cmd = "cat " + str(temp.name)
print(os.system(cmd))
print(footer_content, type(footer_content))
doc = SimpleDocTemplate(
temp.name,
pagesize=A4,
rightMargin=20,
leftMargin=20,
topMargin=20,
bottomMargin=80,
allowSplitting=1,
title="Prescription",
author="System.com")
frame = Frame(doc.leftMargin, doc.bottomMargin, doc.width, doc.height,
id='normal')
template = PageTemplate(
id='test',
frames=frame,
onPage=partial(footer, content=footer_content)
)
doc.addPageTemplates([template])
doc.build(self.elements,
onFirstPage=partial(footer, content=footer_content),
onLaterPages=partial(footer, content=footer_content)
)
print(f'Generated {temp.name}')
I get the following output:
2020-11-29 13: 06: 33, 915 django.request ERROR Internal Server Error: / clinic/presc/k-0NGpApcg
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 655, in open_for_read
return open_for_read_by_name(name, mode)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 599, in open_for_read_by_name
return open(name, mode)
ValueError: embedded null byte
During handling of the above exception, another exception occurred:
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 658, in open_for_read
return getBytesIO(datareader(name) if name[:5].lower() == 'data:' else urlopen(name).read())
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 517, in open
req.timeout = timeout
AttributeError: 'bytes' object has no attribute 'timeout'
During handling of the above exception, another exception occurred:
Traceback(most recent call last):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/exception.py", line 34, in inner
response = get_response(request)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/joel/myappointments/clinic/views.py", line 6879, in GoGetPrescription
clinicobj = clinicobj,
File "/home/joel/myappointments/clinic/views.py", line 16222, in PDFPrescriptions
return prescription.generate_pdf(action=action, rating=True)
File "/home/joel/myappointments/clinic/views.py", line 15415, in generate_pdf
return self.final_generate(footer_content, action=action)
File "/home/joel/myappointments/clinic/views.py", line 15447, in final_generate
onLaterPages = partial(footer, content=footer_content)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 1291, in build
BaseDocTemplate.build(self, flowables, canvasmaker=canvasmaker)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 1056, in build
self.handle_flowable(flowables)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/doctemplate.py", line 912, in handle_flowable
if frame.add(f, canv, trySplit=self.allowSplitting):
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/frames.py", line 174, in _add
w, h = flowable.wrap(aW, h)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 1206, in wrap
self._calc(availWidth, availHeight)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 641, in _calc
W = self._calcPreliminaryWidths(availWidth) # widths
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 754, in _calcPreliminaryWidths
new = elementWidth(value, style) or 0
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 518, in _elementWidth
w = v.minWidth() # should be all flowables
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 873, in minWidth
style.leftPadding+style.rightPadding)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/tables.py", line 512, in _elementWidth
if hasattr(v, 'drawWidth') and isinstance(v.drawWidth, (int, float)): return v.drawWidth
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 494, in __getattr__
self._setup_inner()
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 455, in _setup_inner
img=self._img
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/platypus/flowables.py", line 488, in __getattr__
self._img=ImageReader(self._file)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 813, in __init__
annotateException('\nfileName=%r identity=%s' %
(fileName, self.identity()))
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 1394, in annotateException
rl_reraise(t, v, b)
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 147, in rl_reraise
raise v
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 777, in __init__
self.fp=open_for_read(fileName, 'b')
File "/home/joel/myappointments/venv/lib/python3.6/site-packages/reportlab/lib/utils.py", line 660, in open_for_read
raise IOError('Cannot open resource "%s"' % name)
OSError: Cannot open resource "b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0 #\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82'"
fileName=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0#\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82' identity=[ImageReader#0x7f1e0987ecf8 filename=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xd2\x00\x00\x00\xd2\x01\x00\x00\x00\x00\x17\xe2\xa3\xef\x00\x00\x01$IDATx\x9c\xed\x98An\xc4 \x10\x04k\x16\xdf\xf1\x8f\xe0gyS~\x80\x9f\x92\x1f\xe0;\xab\xde\x03\xc6\xeb\x1c"\xe5d\xd0\xda\x1c\xd0#\xcbRk4\x9e\xee\x01\xb6\xe5$\xa9 \xe5v\xc3\x83\xbf\xd7\xe7cA\x92\x94\xc1"N\x16k\x86\xa4\xd1x\x9e\x8bA\xc8#\xc8NJ\xbe e\'\xc0]</\x07\xccb\xdb\xfas\xe9\x8eM\xefP\xac\x13b\xed\xc6e0\xccKJ\x80\xd9\xecd\x11\x90T\xfap\x19\x06\xdb\x97\x13!;\xd5v\xd3\x87\xcbH\xd8\xa4=\x14\xeb\xd3\xc0\xb7\x9be$\x9e\x9d\xea\xa5V\x89/u\xab\xca\x94F\xe2yz^\x94\xbc\x04^\xda\x8ePe{,\x9e}\xeaE\xe9\xed\xe6\xe0\xae\x17\xa0\xa6#\xf9\xb2\x9b;\xe9\x1f\xdf}:\xc6A\x80v=\xbau\xba\xd5\xcb\xef_hk7#\xf1\xec\xee_\xf0\x92\x94\x9d.\xde_\xda<\xdd\xde\x19R\xb5\xbfW\xcf\xcb\x03V3\x8b\xb4\x911\xfc\x98\xd9\xd7\xe5\xfb\xcb\x11[f\'\x96\x19\x80\xa7\x8d\xcb\xf3\x0c\xec0O\x13\xbe\xa7b\xf8\x0c\x8b\xdd\xben\xef/\xc0\xf68\xb5E#\xf1\xec\xaaGU\xac\x9d\x08\xba\xba\xdf}\x01<\xf7\xbf\x8cN\xed-\x8a\x00\x00\x00\x00IEND\xaeB`\x82']
From the error, it appears that it is erroring out on getting the name of the image file. But there is no file. The image is being generated from BytesIO.
Your generate_qr_code function, which you did not show us, is NOT returning a BytesIO object. It's returning the raw bytes of the PNG image. When you print(img, type(img)), it told you it was of type "bytes", right? That's a string of bytes, not a BytesIO object. If you wrap those bytes into a BytesIO object, then the reportlab Image constructor will be able to handle it.
I am trying to use Pandas library to read csv files, using Eclipse's PyDev.
foo.csv file:
"head1", "head2",
"A", "123"
test.py:
import pandas as pd
data = pd.read_csv('foo.csv');
print data
I ran this and got an error:
Traceback (most recent call last):
File "C:\Users\qqq\studyspace\macd\test3.py", line 4, in <module>
print data
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 666, in __str__
return self.__bytes__()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 676, in __bytes__
return self.__unicode__().encode(encoding, 'replace')
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 691, in __unicode__
fits_horizontal = self._repr_fits_horizontal_()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 651, in _repr_fits_horizontal_
d.to_string(buf=buf)
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 1488, in to_string
formatter.to_string()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 314, in to_string
strcols = self._to_str_columns()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 258, in _to_str_columns
str_index = self._get_formatted_index()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 472, in _get_formatted_index
fmt_index = [index.format(name=show_index_names, formatter=fmt)]
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 450, in format
return self._format_with_header(header, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 472, in _format_with_header
result = _trim_front(format_array(values, None, justify='left'))
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1321, in format_array
return fmt_obj.get_result()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1448, in get_result
return _make_fixed_width(fmt_values, self.justify)
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1495, in _make_fixed_width
max_len = np.max([_strlen(x) for x in strings])
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 184, in _strlen
return len(x.decode(encoding))
LookupError: unknown encoding: MS874
I have tried to run this in IPython, and it does not give the error, so I think the problem is with my Eclipse setting. I use Eclipse Juno and I installed Pandas via Python(x,y).
I have tried to solve it blindly like this
import pandas as pd
data = pd.read_csv('foo.csv');
b = True;
while(b):
try:
print data
b = False
except:
print 'foooo'
And it just printed 'foooo' forever.
I have found the solution.
Right click on the project => Properties => Resource => Text file encoding. Choose other => UTF-8.
here is the sample code:
from mechanize import Browser
br = Browser()
page = br.open('http://hunters.tclans.ru/news.php?readmore=2')
br.form = br.forms().next()
print br.form
The problem is that server return incorrect encoding (windows-cp1251). How can I manually set the encoding of the current page in mechanize?
Error:
Traceback (most recent call last):
File "/tmp/stackoverflow.py", line 5, in <module>
br.form = br.forms().next()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_mechanize.py", line 426, in forms
return self._factory.forms()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 559, in forms
self._forms_factory.forms())
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 225, in forms
_urlunparse=_rfc3986.urlunsplit,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 967, in ParseResponseEx
_urlunparse=_urlunparse,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 1104, in _ParseFileEx
fp.feed(data)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 870, in feed
sgmllib.SGMLParser.feed(self, data)
File "/usr/lib/python2.6/sgmllib.py", line 104, in feed
self.goahead(0)
File "/usr/lib/python2.6/sgmllib.py", line 193, in goahead
self.handle_entityref(name)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 751, in handle_entityref
'&%s;' % name, self._entitydefs, self._encoding))
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 238, in unescape
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
File "/usr/lib/python2.6/re.py", line 151, in sub
return _compile(pattern, 0).sub(repl, string, count)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 230, in replace_entities
repl = repl.encode(encoding)
LookupError: unknown encoding: windows-cp1251
I don't know about Mechanize, but you could hack codecs to accept wrong encoding names that have both ‘windows’ and ‘cp’:
>>> def fixcp(name):
... if name.lower().startswith('windows-cp'):
... try:
... return codecs.lookup(name[:8]+name[10:])
... except LookupError:
... pass
... return None
...
>>> codecs.register(fixcp)
>>> '\xcd\xe0\xef\xee\xec\xe8\xed\xe0\xe5\xec'.decode('windows-cp1251')
u'Напоминаем'
Fixed by setting
br._factory.encoding = enc
br._factory._forms_factory.encoding = enc
br._factory._links_factory._encoding = enc
(note the underscores) after br.open()