Python Bigtable Dataflow - Can't pickle <class 'Mutation'> - python

I'm writing a Dataflow pipeline using Apache beam to add large batches of rows of data to Bigtable.
apache-beam==2.24.0
google-cloud-bigtable==2.4.0
I have the following method used in my pipeline to create the Bigtable row(s) prior to writing to Bigtable:
class CreateBigtableRow(beam.DoFn):
def __init__(self, settings):
self.column_family = settings["bigtable_column_family"]
super(CreateBigtableRow, self).__init__()
def process(self, usage_data, *args, **kwargs):
row_key = BigTable.generate_row_key(usage_data, key_order)
return [
BigTable.create_row_and_assign_values(
row_key, usage_data, self.column_family
)
]
where `create_row_and_assign_values is defined as:
def create_row_and_assign_values(
cls, key: str, row: dict, column_family: str
) -> DirectRow:
table_row = DirectRow(key.encode())
for key, val in row.items():
if isinstance(val, float):
val = struct.pack(">d", val)
table_row.set_cell(column_family, key.encode(), val)
return table_row
My pipeline is as follows:
with beam.Pipeline(options=pipeline_options) as pipe:
(
pipe
| beam.Create(["/sample_files/*combined*"]) # reads sample csv file
| fileio.MatchAll()
| fileio.ReadMatches()
| beam.FlatMap(
lambda file: csv.DictReader(open(file.metadata.path))
)
| "Transform to Usage dict" >> beam.ParDo(TransformToBigtableData())
| "Create Bigtable Row" >> beam.ParDo(CreateBigtableRow(bigtable_settings))
| WriteToBigTable(
project_id=bigtable_settings["bigtable_project"],
instance_id=bigtable_settings["bigtable_instance"],
table_id=bigtable_settings["bigtable_table"])
)
The problem I'm having is I get the error
_pickle.PicklingError: Can't pickle <class 'Mutation'>: attribute lookup Mutation on __main__ failed [while running 'Create Bigtable Row']
when running the pipeline. I've added steps to manually batch process the records by using the google-cloud-bigtable library's Bigtable Client, but would prefer to use the build-in WriteToBigTable method as it handles everything for me.
Full stack trace:
Traceback (most recent call last):
File "/app/src/ingest/main.py", line 226, in <module>
run(
File "/app/src/ingest/main.py", line 149, in run
(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/pipeline.py", line 596, in __exit__
self.result = self.run()
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/pipeline.py", line 546, in run
return Pipeline.from_runner_api(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/pipeline.py", line 573, in run
return self.runner.run_pipeline(self, self._options)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/direct/direct_runner.py", line 131, in run_pipeline
return runner.run_pipeline(pipeline, options)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 195, in run_pipeline
self._latest_run_result = self.run_via_runner_api(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 206, in run_via_runner_api
return self.run_stages(stage_context, stages)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 384, in run_stages
stage_results = self._run_stage(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 646, in _run_stage
self._run_bundle(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 769, in _run_bundle
result, splits = bundle_manager.process_bundle(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 1080, in process_bundle
result_future = self._worker_handler.control_conn.push(process_bundle_req)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/portability/fn_api_runner/worker_handlers.py", line 378, in push
response = self.worker.do_instruction(request)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py", line 597, in do_instruction
return getattr(self, request_type)(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/sdk_worker.py", line 635, in process_bundle
bundle_processor.process_bundle(instruction_id))
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py", line 995, in process_bundle
input_op_by_transform_id[element.transform_id].process_encoded(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/bundle_processor.py", line 221, in process_encoded
self.output(decoded_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 354, in output
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 216, in receive
self.consumer.process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 714, in process
delayed_applications = self.dofn_runner.process(o)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1235, in process
self._reraise_augmented(exn)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1316, in _reraise_augmented
raise new_exn.with_traceback(tb)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1233, in process
return self.do_fn_invoker.invoke_process(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 571, in invoke_process
self.output_processor.process_outputs(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/common.py", line 1396, in process_outputs
self.main_receivers.receive(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 215, in receive
self.update_counters_start(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/operations.py", line 179, in update_counters_start
self.opcounter.update_from(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/opcounters.py", line 211, in update_from
self.do_sample(windowed_value)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/runners/worker/opcounters.py", line 250, in do_sample
self.coder_impl.get_estimated_size_and_observables(windowed_value))
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/coders/coder_impl.py", line 1371, in get_estimated_size_and_observables
self._value_coder.get_estimated_size_and_observables(
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/coders/coder_impl.py", line 358, in get_estimated_size_and_observables
self.encode_to_stream(value, out, nested)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/coders/coder_impl.py", line 422, in encode_to_stream
self.fallback_coder_impl.encode_to_stream(value, stream, nested)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/coders/coder_impl.py", line 262, in encode_to_stream
return stream.write(self._encoder(value), nested)
File "/opt/pysetup/.venv/lib/python3.9/site-packages/apache_beam/coders/coders.py", line 800, in <lambda>
lambda x: dumps(x, protocol), pickle.loads)
_pickle.PicklingError: Can't pickle <class 'Mutation'>: attribute lookup Mutation on __main__ failed [while running 'Create Bigtable Row']

Your google-cloud-bigtable version is too high.
There is some movement in updating apache-beam dependencies here
They have the same issue. Can you roll back your bigtable version to something before 2? If you run this:
pip install apache-beam[gcp]
It'll install the recommended version.

Related

I got the error code trying to install django

Traceback (most recent call last):
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\urllib3\response.py", line 438, in _error_catcher
yield
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\urllib3\response.py", line 519, in read
data = self._fp.read(amt) if not fp_closed else b""
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\cachecontrol\filewrapper.py", line 62, in read
data = self.__fp.read(amt)
File "C:\Users\HP\AppData\Local\Programs\Python\Python310\lib\http\client.py", line 465, in read
s = self.fp.read(amt)
File "C:\Users\HP\AppData\Local\Programs\Python\Python310\lib\socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "C:\Users\HP\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1273, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\HP\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1129, in read
return self._sslobj.read(len, buffer)
TimeoutError: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\cli\base_command.py", line 173, in _main
status = self.run(options, args)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\cli\req_command.py", line 203, in wrapper
return func(self, options, args)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\commands\install.py", line 315, in run
requirement_set = resolver.resolve(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\resolver.py", line 94, in resolve
result = self._result = resolver.resolve(
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\resolvelib\resolvers.py", line 472, in resolve
state = resolution.resolve(requirements, max_rounds=max_rounds)
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\resolvelib\resolvers.py", line 341, in resolve
self._add_to_criteria(self.state.criteria, r, parent=None)
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\resolvelib\resolvers.py", line 172, in _add_to_criteria
if not criterion.candidates:
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\resolvelib\structs.py", line 151, in bool
return bool(self._sequence)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\found_candidates.py", line 140, in bool
return any(self)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\found_candidates.py", line 128, in
return (c for c in iterator if id(c) not in self._incompatible_ids)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\found_candidates.py", line 32, in _iter_built
candidate = func()
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\factory.py", line 204, in _make_candidate_from_link
self._link_candidate_cache[link] = LinkCandidate(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\candidates.py", line 295, in init
super().init(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\candidates.py", line 156, in init
self.dist = self._prepare()
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\candidates.py", line 227, in _prepare
dist = self._prepare_distribution()
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\resolution\resolvelib\candidates.py", line 305, in _prepare_distribution
return self._factory.preparer.prepare_linked_requirement(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\operations\prepare.py", line 508, in prepare_linked_requirement
return self._prepare_linked_requirement(req, parallel_builds)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\operations\prepare.py", line 550, in _prepare_linked_requirement
local_file = unpack_url(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\operations\prepare.py", line 239, in unpack_url
file = get_http_url(
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\operations\prepare.py", line 102, in get_http_url
from_path, content_type = download(link, temp_dir.path)
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\network\download.py", line 145, in call
for chunk in chunks:
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\cli\progress_bars.py", line 144, in iter
for x in it:
File "C:\Users\HP\myproject\lib\site-packages\pip_internal\network\utils.py", line 63, in response_chunks
for chunk in response.raw.stream(
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\urllib3\response.py", line 576, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\urllib3\response.py", line 512, in read
with self._error_catcher():
File "C:\Users\HP\AppData\Local\Programs\Python\Python310\lib\contextlib.py", line 153, in exit
self.gen.throw(typ, value, traceback)
File "C:\Users\HP\myproject\lib\site-packages\pip_vendor\urllib3\response.py", line 443, in _error_catcher
raise ReadTimeoutError(self._pool, None, "Read timed out.")
pip._vendor.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443): Read timed out.
Test the Django installation in a virtual environment,
in cmd:
> python -m venv env
> cd env/Scripts/activate
> pip install django

Beam: AfterProcessingTime cause 'NoneType' object has no attribute 'time'

With the following codes under beam 2.14.0
| "FixedWindow" >> beam.WindowInto(beam.window.FixedWindows(4 * 60),
trigger=beam.trigger.Repeatedly(
beam.trigger.AfterProcessingTime(delay=1 * 60)
),
accumulation_mode=beam.trigger.AccumulationMode.DISCARDING)
The following error comes up
Traceback (most recent call last):
File "beam_home.py", line 287, in <module>
run()
File "beam_home.py", line 282, in run
p.run().wait_until_finish()
File "/usr/local/lib/python2.7/site-packages/apache_beam/pipeline.py", line 406, in run
self._options).run(False)
File "/usr/local/lib/python2.7/site-packages/apache_beam/pipeline.py", line 419, in run
return self.runner.run_pipeline(self, self._options)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/direct/direct_runner.py", line 128, in run_pipeline
return runner.run_pipeline(pipeline, options)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 294, in run_pipeline
default_environment=self._default_environment))
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 301, in run_via_runner_api
return self.run_stages(stage_context, stages)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 383, in run_stages
stage_context.safe_coders)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 655, in _run_stage
result, splits = bundle_manager.process_bundle(data_input, data_output)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1460, in process_bundle
process_bundle_id, transform_id, elements)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1356, in _send_input_to_worker
for byte_stream in byte_streams:
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 186, in __iter__
for wkvs in windowed_key_values(key, windowed_values):
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 966, in process_entire_key
state, windowed_values, output_watermark):
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 1130, in process_elements
self.trigger_fn.on_element(value, window, context)
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 515, in on_element
self.underlying.on_element(element, window, context)
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 373, in on_element
self.early.on_element(element, window, NestedContext(context, 'early'))
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 515, in on_element
self.underlying.on_element(element, window, context)
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 307, in on_element
'', TimeDomain.REAL_TIME, context.get_current_time() + self.delay)
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 759, in get_current_time
return self._outer.get_current_time()
File "/usr/local/lib/python2.7/site-packages/apache_beam/transforms/trigger.py", line 733, in get_current_time
return self._clock.time()
AttributeError: 'NoneType' object has no attribute 'time'
Anything am I missing?
It doesn't look like you are missing anything.
It's apparently a known issue. Please see BEAM-5132.
I think the best workaround is to avoid using AfterProcessingTime which it the root cause. It's annoying but you can imitate its effect in a ParDo class.

AttributeError after running pipenv commands

There is an error occured during running pipenv command:
Traceback (most recent call last):
File "c:\python37\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\python37\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Python37\Scripts\pipenv.exe\__main__.py", line 9, in <module>
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 764, in __call__
return self.main(*args, **kwargs)
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 717, in main
rv = self.invoke(ctx)
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 1137, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 956, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 555, in invoke
return callback(*args, **kwargs)
File "c:\python37\lib\site-packages\pipenv\vendor\click\decorators.py", line 64, in new_func
return ctx.invoke(f, obj, *args, **kwargs)
File "c:\python37\lib\site-packages\pipenv\vendor\click\core.py", line 555, in invoke
return callback(*args, **kwargs)
File "c:\python37\lib\site-packages\pipenv\cli\command.py", line 390, in shell
pypi_mirror=state.pypi_mirror,
File "c:\python37\lib\site-packages\pipenv\core.py", line 2156, in do_shell
three=three, python=python, validate=False, pypi_mirror=pypi_mirror,
File "c:\python37\lib\site-packages\pipenv\core.py", line 574, in ensure_project
pypi_mirror=pypi_mirror,
File "c:\python37\lib\site-packages\pipenv\core.py", line 494, in ensure_virtualenv
python = ensure_python(three=three, python=python)
File "c:\python37\lib\site-packages\pipenv\core.py", line 397, in ensure_python
path_to_python = find_a_system_python(python)
File "c:\python37\lib\site-packages\pipenv\core.py", line 360, in find_a_system_python
python_entry = finder.find_python_version(line)
File "c:\python37\lib\site-packages\pipenv\vendor\pythonfinder\pythonfinder.py", line 108, in find_python_version
match = self.windows_finder.find_python_version(
File "c:\python37\lib\site-packages\pipenv\vendor\pythonfinder\pythonfinder.py", line 63, in windows_finder
self._windows_finder = WindowsFinder()
File "<attrs generated init 4868f44fa19b631f0a86b928e7558f26948d224e>", line 13, in __init__
File "c:\python37\lib\site-packages\pipenv\vendor\pythonfinder\models\windows.py", line 92, in get_versions
py_version = PythonVersion.from_windows_launcher(version_object)
File "c:\python37\lib\site-packages\pipenv\vendor\pythonfinder\models\python.py", line 417, in from_windows_launcher
creation_dict = cls.parse(launcher_entry.info.version)
File "c:\python37\lib\site-packages\pipenv\vendor\pythonfinder\_vendor\pep514tools\_registry.py", line 75, in __getattr__
raise AttributeError(attr)
AttributeError: version
Pipenv version: 2018.11.26
Python version: 3.7.2
What should I do to fix it?

How to make stream pipeline pubsub to datastore with python?

I try to post json file to pubsub and write to datastore with cloud Dataflow in streaming process.
from __future__ import absolute_import
import apache_beam as beam
import json
import logging
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import GoogleCloudOptions
from google.cloud.proto.datastore.v1 import entity_pb2
from apache_beam import window
from apache_beam.io.gcp.pubsub import ReadFromPubSub
from apache_beam.io.gcp.datastore.v1.datastoreio import WriteToDatastore
from googledatastore import helper as datastore_helper
class EntityWrapper(object):
def __init__(self, namespace, kind, ancestor):
self._namespace = namespace
self._kind = kind
self._ancestor = ancestor
def make_entity(self, content):
entity = entity_pb2.Entity()
if self._namespace is not None:
entity.key.partition_id.namespace_id = self._namespace
datastore_helper.add_key_path(entity.key, self._kind, self._ancestor, self._kind, str(uuid.uuid4()))
datastore_helper.add_properties(entity, {"content": unicode(content)})
return entity
pipeline_options = {
'project': PROJECT,
'staging_location': STAGING_LOCATION,
'runner': 'DataflowRunner',
'job_name': JOB_NAME,
'temp_location': TEMP_LOCATION,
'streaming': True}
options = PipelineOptions.from_dictionary(pipeline_options)
def run():
p = beam.Pipeline(options=options)
def parse_pubsub(line):
record = json.loads(line)
return record
(p | "Read from PubSub" >> ReadFromPubSub(topic=TOPIC)
| "PubSub message to Python object" >> beam.Map(parse_pubsub)
| "Windowing" >> beam.WindowInto(window.FixedWindows(10))
| "create entity" >> beam.Map(EntityWrapper(namespace=NAMESPACE, kind=KIND, ancestor=None).make_entity)
| "write to DataStore" >> WriteToDatastore(PROJECT))
result = p.run()
result.wait_until_finish()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
When I run this code on google cloud shell, it is able to run and make pipeline such like this.
But, when I post json to pubsub, it doesn't work.
the error message is below.
JOB_MESSAGE_ERROR: java.util.concurrent.ExecutionException: java.lang.RuntimeException: Error received from SDK harness for instruction -30: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 134, in _execute
response = task()
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 169, in <lambda>
self._execute(lambda: worker.do_instruction(work), work)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 215, in do_instruction
request.instruction_id)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 237, in process_bundle
processor.process_bundle(instruction_id)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 299, in process_bundle
input_op.process_encoded(data.data)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 120, in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py", line 166, in apache_beam.runners.worker.operations.Operation.output
def output(self, windowed_value, output_index=0):
File "apache_beam/runners/worker/operations.py", line 167, in apache_beam.runners.worker.operations.Operation.output
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 363, in apache_beam.runners.common.SimpleInvoker.invoke_process
output_processor.process_outputs(
File "apache_beam/runners/common.py", line 698, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 472, in apache_beam.runners.common.PerWindowInvoker.invoke_process
self._invoke_per_window(
File "apache_beam/runners/common.py", line 522, in apache_beam.runners.common.PerWindowInvoker._invoke_per_window
output_processor.process_outputs(
File "apache_beam/runners/common.py", line 659, in apache_beam.runners.common._OutputProcessor.process_outputs
def process_outputs(self, windowed_input_element, results):
File "apache_beam/runners/common.py", line 698, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 387, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 388, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 589, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 595, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 612, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 593, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 364, in apache_beam.runners.common.SimpleInvoker.invoke_process
windowed_value, self.process_method(windowed_value.value))
File "/home/shinya_yaginuma/.local/lib/python2.7/site-packages/apache_beam/transforms/core.py", line 1035, in <lambda>
File "pubsub_to_datastore.py", line 21, in make_entity
NameError: global name 'entity_pb2' is not defined
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895)
org.apache.beam.sdk.util.MoreFutures.get(MoreFutures.java:57)
com.google.cloud.dataflow.worker.fn.control.RegisterAndProcessBundleOperation.finish(RegisterAndProcessBundleOperation.java:274)
com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:83)
com.google.cloud.dataflow.worker.fn.control.BeamFnMapTaskExecutor.execute(BeamFnMapTaskExecutor.java:101)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1227)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136)
com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
I check the all libraris are installed.
So, I can't understand why the error occur.
Regards,
So your imports are working fine, hence the error occurring when you make the pubsub, not on DF creation. However, entity_pb2 is disappearing when your make_entity actually gets called!
According to the docs, you need the import on the worker that actually gets used, or you can make your imports persistent. Try saving your main session:
pipeline_options = {
'project': PROJECT,
'staging_location': STAGING_LOCATION,
'runner': 'DataflowRunner',
'job_name': JOB_NAME,
'temp_location': TEMP_LOCATION,
'streaming': True,
'save_main_session': True} #

Unexpected pika.exceptions.ConnectionClosed exception

I randomly get the following errors (i.e most of the times the scripts work fine)
File ".../backend/case_status_consumer.py", line 28, in <module>
channel.start_consuming()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 955, in start_consuming
self.connection.process_data_events()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 240, in process_data_events
if self._handle_read():
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 348, in _handle_read
super(BlockingConnection, self)._handle_read()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 351, in _handle_read
self._on_data_available(data)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1285, in _on_data_available
self._process_frame(frame_value)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1365, in _process_frame
self._deliver_frame_to_channel(frame_value)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 976, in _deliver_frame_to_channel
return self._channels[value.channel_number]._handle_content_frame(value)
File "/usr/local/lib/python2.7/dist-packages/pika/channel.py", line 792, in _handle_content_frame
self._on_deliver(*response)
File "/usr/local/lib/python2.7/dist-packages/pika/channel.py", line 886, in _on_deliver
body)
File ".../backend/case_status_consumer.py", line 14, in consume_case_status
case_num['case_year'])
File ".../backend/src/fetcher/case_update.py", line 132, in case_update
properties=pika.BasicProperties(delivery_mode=2))
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 572, in basic_publish
(properties, body), False)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 1159, in _send_method
self.connection.send_method(self.channel_number, method_frame, content)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 274, in send_method
self._send_method(channel_number, method_frame, content)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1503, in _send_method
self._send_frame(frame.Method(channel_number, method_frame))
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 417, in _send_frame
super(BlockingConnection, self)._send_frame(frame_value)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1490, in _send_frame
self._flush_outbound()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 377, in _flush_outbound
if self._handle_write():
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 365, in _handle_write
return self._handle_error(error)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 302, in _handle_error
self._handle_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 248, in _handle_disconnect
self._adapter_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 318, in _adapter_disconnect
self._check_state_on_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 371, in _check_state_on_disconnect
raise exceptions.ConnectionClosed()
pika.exceptions.ConnectionClosed
And I get a similar error while producing too.
File ".../backend/check_for_orders.py", line 115, in <module>
dated=order_dated, ors_fetch=False)
File ".../backend/src/fetcher/case_update.py", line 132, in case_update
properties=pika.BasicProperties(delivery_mode=2))
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 572, in basic_publish
(properties, body), False)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 1159, in _send_method
self.connection.send_method(self.channel_number, method_frame, content)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 274, in send_method
self._send_method(channel_number, method_frame, content)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1503, in _send_method
self._send_frame(frame.Method(channel_number, method_frame))
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 417, in _send_frame
super(BlockingConnection, self)._send_frame(frame_value)
File "/usr/local/lib/python2.7/dist-packages/pika/connection.py", line 1490, in _send_frame
self._flush_outbound()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 377, in _flush_outbound
if self._handle_write():
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 365, in _handle_write
return self._handle_error(error)
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 302, in _handle_error
self._handle_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/base_connection.py", line 248, in _handle_disconnect
self._adapter_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 318, in _adapter_disconnect
self._check_state_on_disconnect()
File "/usr/local/lib/python2.7/dist-packages/pika/adapters/blocking_connection.py", line 371, in _check_state_on_disconnect
raise exceptions.ConnectionClosed()
pika.exceptions.ConnectionClosed
My connection is set to:
connection = pika.BlockingConnection(pika.ConnectionParameters('localhost', heartbeat_interval=0))
I have even tried setting the heartbeat interval to a longer time (like 60 and 90), but I still get these errors.
I cannot post the full code but what my scripts are doing is, using 'requests' to fetch a page (with timeout as 50 seconds and maximum 4 retries)
This issue is fixed in pika 0.10.0

Categories

Resources