Pymongo ignoring allowDiskUse = True - python

I've looked at the other answers to this question, and yet it is still not working. I am trying to delete duplicate cases, here is the function:
def deleteDups(datab):
col = db[datab]
pipeline = [
{'$group': {
'_id': {
'CASE NUMBER': '$CASE NUMBER',
'JURISDICTION': '$JURISDICTION'},#needs to be case insensitive
'count': {'$sum': 1},
'ids': {'$push': '$_id'}
}
},
{'$match': {'count': {'$gt': 1}}},
]
results = col.aggregate(pipeline, allowDiskUse = True)
count = 0
for result in results:
doc_count = 0
print(result)
it = iter(result['ids'])
next(it)
for id in it:
deleted = col.delete_one({'_id': id})
count += 1
doc_count += 1
#print("API call recieved:", deleted.acknowledged) debug, is the database recieving requests
print("Total documents deleted:", count)
And yet, every time, I get this traceback:
File "C:\Users\*****\Documents\GitHub\*****\controller.py", line 202, in deleteDups
results = col.aggregate(pipeline, allowDiskUse = True)
File "C:\Python38\lib\site-packages\pymongo\collection.py", line 2375, in aggregate
return self._aggregate(_CollectionAggregationCommand,
File "C:\Python38\lib\site-packages\pymongo\collection.py", line 2297, in _aggregate
return self.__database.client._retryable_read(
File "C:\Python38\lib\site-packages\pymongo\mongo_client.py", line 1464, in _retryable_read
return func(session, server, sock_info, slave_ok)
File "C:\Python38\lib\site-packages\pymongo\aggregation.py", line 136, in get_cursor
result = sock_info.command(
File "C:\Python38\lib\site-packages\pymongo\pool.py", line 603, in command
return command(self.sock, dbname, spec, slave_ok,
File "C:\Python38\lib\site-packages\pymongo\network.py", line 165, in command
helpers._check_command_response(
File "C:\Python38\lib\site-packages\pymongo\helpers.py", line 159, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Exceeded memory limit for $group, but didn't allow external sort. Pass allowDiskUse:true to opt in.
I asterisked out bits of path to protect privacy. But it is driving me absolutely nuts that this line: results = col.aggregate(pipeline, allowDiskUse = True) very explicitly passes allowDiskUse = True, and Mongo is just ignoring it. If I misspelled something, I'm blind. True has to be capitalized to pass a bool in python.
I feel like I'm going crazy here.

According to the documentation:
Atlas Free Tier and shared clusters do not support the allowDiskUse option for the aggregation command or its helper method.
(Thanks to Shane Harvey for this info)

Related

How to stub S3.Object.wait_until_exists?

I have been tasked with writing tests for an s3 uploading function which uses S3.Object.wait_until_exists to wait for upload to complete and get the content length of the upload to return it.
But so far I am failing to stub head_object for the waiter.
I have explored and found the waiter has two acceptors:
if HTTP code == 200, accept
if HTTP code == 404, retry
I don't know how to explain in text more so instead here is an MRE.
from datetime import datetime
from io import BytesIO
import boto3
import botocore
import botocore.stub
testing_bucket = "bucket"
testing_key = "key/of/object"
testing_data = b"data"
s3 = boto3.resource("s3")
def put():
try:
o = s3.Object(testing_bucket, testing_key)
o.load() # head_object * 1
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
etag = ""
else:
raise e
else:
etag = o.e_tag
try:
o.upload_fileobj(BytesIO(testing_data)) # put_object * 1
except botocore.exceptions.ClientError as e:
raise e
else:
o.wait_until_exists(IfNoneMatch=etag) # head_object * n until accepted
return o.content_length # not sure if calling head_object again
with botocore.stub.Stubber(s3.meta.client) as s3_stub:
s3_stub.add_response(
method="head_object",
service_response={
"ETag": "fffffffe",
"ContentLength": 0,
},
expected_params={
"Bucket": testing_bucket,
"Key": testing_key,
},
)
s3_stub.add_response(
method="put_object",
service_response={},
expected_params={
"Bucket": testing_bucket,
"Key": testing_key,
"Body": botocore.stub.ANY,
},
)
s3_stub.add_response( # cause time to increase by 5 seconds per response
method="head_object",
service_response={
"ETag": "ffffffff",
"AcceptRanges": "bytes",
"ContentLength": len(testing_data),
"LastModified": datetime.now(),
"Metadata": {},
"VersionId": "null",
},
expected_params={
"Bucket": testing_bucket,
"Key": testing_key,
"IfNoneMatch": "fffffffe",
},
)
print(put()) # should print 4
And running the above gives:
time python mre.py
Traceback (most recent call last):
File "/tmp/mre.py", line 72, in <module>
put()
File "/tmp/mre.py", line 30, in put
o.wait_until_exists(IfNoneMatch=etag) # head_object * 1
File "/tmp/.tox/py310/lib/python3.10/site-packages/boto3/resources/factory.py", line 413, in do_waiter
waiter(self, *args, **kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/boto3/resources/action.py", line 215, in __call__
response = waiter.wait(**params)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/waiter.py", line 55, in wait
Waiter.wait(self, **kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/waiter.py", line 343, in wait
response = self._operation_method(**kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/waiter.py", line 93, in __call__
return self._client_method(**kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/client.py", line 508, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/client.py", line 878, in _make_api_call
request_dict = self._convert_to_request_dict(
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/client.py", line 936, in _convert_to_request_dict
api_params = self._emit_api_params(
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/client.py", line 969, in _emit_api_params
self.meta.events.emit(
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/hooks.py", line 412, in emit
return self._emitter.emit(aliased_event_name, **kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/hooks.py", line 256, in emit
return self._emit(event_name, kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/hooks.py", line 239, in _emit
response = handler(**kwargs)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/stub.py", line 376, in _assert_expected_params
self._assert_expected_call_order(model, params)
File "/tmp/.tox/py310/lib/python3.10/site-packages/botocore/stub.py", line 352, in _assert_expected_call_order
raise UnStubbedResponseError(
botocore.exceptions.UnStubbedResponseError: Error getting response stub for operation HeadObject: Unexpected API Call: A call was made but no additional calls expected. Either the API Call was not stubbed or it was called multiple times.
python mre.py 0.39s user 0.19s system 9% cpu 5.859 total
Or with 2 answer, same thing with python mre.py 0.40s user 0.20s system 5% cpu 10.742 total.
I found a solution for this, as highlighted the waiter waits for a 200 status code, adding it to the response like the following works:
s3_stub.add_response(
method="head_object",
service_response={
"ETag": "ffffffff",
"AcceptRanges": "bytes",
"ContentLength": len(testing_data),
"LastModified": datetime.now(),
"Metadata": {},
"VersionId": "null",
"ResponseMetadata": {"HTTPStatusCode": 200},
},
expected_params={
"Bucket": testing_bucket,
"Key": testing_key,
"IfNoneMatch": "fffffffe",
},
)

Python Web3 Ganache - Traceback error on BuildTransaction

I am going through the freeCodeCamp course on YouTube and have caught an error regarding when I run. As I am trying to build a transaction into Ganache. I see in Ganache logs that there is activity as well. First time posting, so let me know what other information should I provide!
The course: Solidity, Blockchain, and Smart Contract Course – Beginner to Expert Python Tutorial
transaction = SimpleStorage.constructor().buildTransaction(
{"chainId": chain_id, "from": my_address, "nonce": nonce}
)
The error that the terminal returns:
Traceback (most recent call last):
File "C:\Users\Justin\demos\web3_py_simple_storage\deploy.py",
line 60, in <module>
transaction = SimpleStorage.constructor().buildTransaction(
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\decorators.py", line 18, in _wrapper
return self.method(obj, *args, **kwargs)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\contract.py", line 684, in buildTransaction
return fill_transaction_defaults(self.web3, built_transaction)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\_utils\transactions.py", line 121, in fill_transaction_defaults
default_val = default_getter(web3, transaction)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\_utils\transactions.py", line 67, in <lambda>
'gas': lambda web3, tx: web3.eth.estimate_gas(tx),
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\eth.py", line 759, in estimate_gas
return self._estimate_gas(transaction, block_identifier)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\module.py", line 57, in caller
result = w3.manager.request_blocking(method_str,
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\manager.py", line 197, in request_blocking
response = self._make_request(method, params)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\manager.py", line 150, in _make_request
return request_func(method, params)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\formatting.py", line 76, in apply_formatters
response = make_request(method, params)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\gas_price_strategy.py", line 90,
in middleware
return make_request(method, params)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\formatting.py", line 74, in apply_formatters
response = make_request(method, formatted_params)
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\attrdict.py", line 33, in middleware
response = make_request(method, params)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\formatting.py", line 74, in apply_formatters
response = make_request(method, formatted_params)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\formatting.py", line 73, in apply_formatters
formatted_params = formatter(params)
File "cytoolz\functoolz.pyx", line 503, in cytoolz.functoolz.Compose.__call__
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\decorators.py", line 91, in wrapper
return ReturnType(result) # type: ignore
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\applicators.py", line 22, in apply_formatter_at_index
yield formatter(item)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\functional.py", line 45, in inner
return callback(fn(*args, **kwargs))
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\applicators.py", line 84, in apply_formatters_to_dict
yield key, formatters[key](item)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\eth_utils\applicators.py", line 72, in apply_formatter_if
return formatter(value)
File "cytoolz\functoolz.pyx", line 250, in cytoolz.functoolz.curry.__call__
File "C:\Users\Justin\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\web3\middleware\validation.py", line 57, in validate_chain_id
raise ValidationError(
web3.exceptions.ValidationError: The transaction declared chain ID 5777, but the connected node is on 1337
The Full Code that I used is below:
from solcx import compile_standard, install_solc
import json
from web3 import Web3
with open("./SimpleStorage.sol", "r") as file:
simple_storage_file = file.read()
# Compile Our Solidity
print("Installing...")
install_solc("0.6.0")
compiled_sol = compile_standard(
{
"language": "Solidity",
"sources": {"SimpleStorage.sol": {"content": simple_storage_file}},
"settings": {
"outputSelection": {
"*": {
"*": [
"abi",
"metadata",
"evm.bytecode",
"evm.bytecode.sourceMap",
]
}
}
},
},
solc_version="0.6.0",
)
with open("compiled_code.json", "w") as file:
json.dump(compiled_sol, file)
# get bytecode
bytecode = compiled_sol["contracts"]["SimpleStorage.sol"]["SimpleStorage"]["evm"][
"bytecode"
]["object"]
# get abi
abi = compiled_sol["contracts"]["SimpleStorage.sol"]["SimpleStorage"]["abi"]
# for connecting to ganache
w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:7545"))
chain_id = 5777
my_address = "0xf2580f5ddfFb89e69A12a7CbCa8CA175Df4cBe08"
private_key = "0x937073896304af4297e6bbb3a3c623689d48388aa8595058752fafb522b31a13"
# Create the contract in python
SimpleStorage = w3.eth.contract(abi=abi, bytecode=bytecode)
print(SimpleStorage)
# Get the latest transaction
nonce = w3.eth.getTransactionCount(my_address)
print(nonce)
# 1. Build a transaction
# 2. Sign a transaction
# 3. Send a transaction
transaction = SimpleStorage.constructor().buildTransaction(
{"chainId": chain_id, "from": my_address, "nonce": nonce}
)
print(transaction)
As of 5.25.0 of web3.py, we now need to add gasPrice to our transactions with a local Ganache chain. Adding "gasPrice": w3.eth.gas_price should fix your issue in the transactions.
Full Example:
transaction = SimpleStorage.constructor().buildTransaction(
{
"chainId": chain_id,
"gasPrice": w3.eth.gas_price,
"from": my_address,
"nonce": nonce,
}
)
Use the following line in .buildTransaction:
transaction = SimpleStorage.constructor().buildTransaction(
{'from': address, 'nonce':nonce, 'chainId':chain_id,'gas': 1728712,
'gasPrice': w3.toWei('21', 'gwei')})
And change the chain_id = 1337.
The transaction declared chain ID 5777, but the connected node is on 1337.
You have to change your chain_id:
chain_id = 5777❌
But use the one below
chain_id = 1337✔
I was having this exact same problem. Including "gasPrice": w3.eth.gas_price, solved it originally, but it came back.
After some tweaking, I found that casting chain_id (because I was getting that from .env as well) as an int solved my problems.
So the full code looked like this:
transaction = SimpleStorage.constructor().buildTransaction(
{
"chainId": int(chain_id),
"gasPrice": w3.eth.gas_price,
"from": my_address,
"nonce": nonce,
}
)

/model/train http API giving 500 error when providing “nlu” data in json

I am trying to train model using httpapi and json data blow is the code.
import requests
import json
data = {
"config": "language: en\npipeline:\n- name: WhitespaceTokenizer\n- name: RegexFeaturizer\n- name: LexicalSyntacticFeaturizer\n- name: CountVectorsFeaturizer\n- name: CountVectorsFeaturizer\nanalyzer: \"char_wb\"\nmin_ngram: 1\nmax_ngram: 4\n- name: DIETClassifier\nepochs: 100\n- name: EntitySynonymMapper\n- name: ResponseSelector\nepochs: 100",
"nlu": json.dumps({
"rasa_nlu_data": {
"regex_features": [],
"entity_synonyms": [],
"common_examples": [
{
"text": "i m looking for a place to eat",
"intent": "restaurant_search",
"entities": []
},
{
"text": "I want to grab lunch",
"intent": "restaurant_search",
"entities": []
},
{
"text": "I am searching for a dinner spot",
"intent": "restaurant_search",
"entities": []
},
]
}
}),
"force": False,
"save_to_default_model_directory": True
}
r = requests.post('http://localhost:5005/model/train', json=data)
It gives me 500 error. Below is the log for error:
2020-09-30 07:40:37,511 [DEBUG] Traceback (most recent call last):
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/server.py", line 810, in train
None, functools.partial(train_model, **info)
File "/usr/lib/python3.6/concurrent/futures/thread.py", line 56, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/train.py", line 50, in train
additional_arguments=additional_arguments,
File "uvloop/loop.pyx", line 1456, in uvloop.loop.Loop.run_until_complete
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/train.py", line 83, in train_async
config, domain, training_files
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/importers/importer.py", line 79, in load_from_config
config = io_utils.read_config_file(config_path)
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/utils/io.py", line 188, in read_config_file
content = read_yaml(read_file(filename))
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/utils/io.py", line 124, in read_yaml
return yaml_parser.load(content) or {}
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/ruamel/yaml/main.py", line 343, in load
return constructor.get_single_data()
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/ruamel/yaml/constructor.py", line 111, in get_single_data
node = self.composer.get_single_node()
File "_ruamel_yaml.pyx", line 706, in _ruamel_yaml.CParser.get_single_node
File "_ruamel_yaml.pyx", line 724, in _ruamel_yaml.CParser._compose_document
File "_ruamel_yaml.pyx", line 775, in _ruamel_yaml.CParser._compose_node
File "_ruamel_yaml.pyx", line 891, in _ruamel_yaml.CParser._compose_mapping_node
File "_ruamel_yaml.pyx", line 904, in _ruamel_yaml.CParser._parse_next_event
ruamel.yaml.parser.ParserError: while parsing a block mapping
in "<unicode string>", line 1, column 1
did not find expected key
in "<unicode string>", line 11, column 1
When I train model using terminal commands and json file, it is trained successfully. I think I am missing some formatting required for /model/train api. Can someone tell me where am I going wrong?
I am using rasa version 1.10.14.
Thankyou in advance.
Turns out that the string in config was not proper. It was giving error when training model due to double quotes used with escape characters. I made some tweaks in the config and it trained the model successfully

http 400 error Elasticsearch --query to retrieve documents containing any of the keywords in an array

I have a huge number of keywords written in a file. I put them in an array and tried to run a query for each of the item in the array and retrieve the documents that have any of the keywords. It shows me the number of returned documents only for 50 of them and at the end I get a couple of errors.
Here is my code:
A subset of keywords:
C0001396 SYNDROME STOKES-ADAMS
C0001396 Syndrome, Adams-Stokes
C0001396 Syndrome, Stokes-Adams
C0002962 3-12 ANGINAL SYNDROMES
C0002962 ANGINA
The CODE:
from elasticsearch import Elasticsearch
import json
count=0
keyword_array = []
es = Elasticsearch(['http://IP:9200/'])
with open('localDrive\\C0577785C.txt') as my_keywordfile:
for keyword in my_keywordfile.readlines():
keyword_ex=keyword[9:]
print(keyword_ex)
keyword_array.append(keyword_ex.strip().strip("'"))
with open('localDrive\\out.txt', 'wb') as f:
for x in keyword_array:
doc = {
"from" : 0, "size" : 1000000,
"query":{
"query_string":{
"fields" : ["description", "title"],
"query" : x
}
}
}
res = es.search(index='INDEXED_REPO', body=doc)
print("Got %d Hits:" % res['hits']['total'])
count += 1
print(count)
f.write(json.dumps(res).encode("utf-8"))
f.flush()
f.close()
Errors:
GET http://INDEX_REPO/indexed/_search [status:400 request:0.012s]
Traceback (most recent call last):
File
"localDrive/PycharmProjects/extract_keywords/elastic_serach5.py", line
32, in <module>
res = es.search(index='INDEXED_REPO', body=doc)
File "......\Local\Programs\Python\Python36-32\lib\site-
packages\elasticsearch\client\utils.py", line 73, in _wrapped
return func(*args, params=params, **kwargs)
File "....\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\elasticsearch\client\__init__.py", line 623, in search
doc_type, '_search'), params=params, body=body)
File "......\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\elasticsearch\transport.py", line 312, in perform_request
status, headers, data = connection.perform_request(method, url, params,
body, ignore=ignore, timeout=timeout)
File "......\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\elasticsearch\connection\http_urllib3.py", line 128, in
perform_request
self._raise_error(response.status, raw_data)
File "......\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\elasticsearch\connection\base.py", line 125, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code,
error_message, additional_info)
elasticsearch.exceptions.RequestError: <exception str() failed>
Any idea why it is happening?
Thanks,
The elasticsearch query was not in the right format. I changed it to the following and it worked!:
doc = {
"query": {
"multi_match": {
"query": x,
"type": "phrase",
"fields": ["title", "description"],
"operator": "and"
}
}
}

bulk update failing when document has attachments?

I am performing the following operation:
Prepare some documents: docs = [ doc1, doc2, ... ]. The documents have maybe attachments
I POST to _bulk_docs the list of documents
I get an Exception > Problems updating list of documents (length = 1): (500, ('badarg', '58'))
My bulk_docs is (in this case just one):
[ { '_attachments': { 'image.png': { 'content_type': 'image/png',
'data': 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAM8AAADkCAIAAACwiOf9AAAAA3NCSVQICAjb4U/gAAAgAElEQVR4nO...'}},
'_id': '08b8fc66-cd90-47a1-9053-4f6fefabdfe3',
'_rev': '15-ff3d0e8baa56e5ad2fac4937264fb3f6',
'docmeta': { 'created': '2013-10-01 14:48:24.311257',
'updated': [ '2013-10-01 14:48:24.394157',
'2013-12-11 08:19:47.271812',
'2013-12-11 08:25:05.662546',
'2013-12-11 10:38:56.116145']},
'org_id': 45345,
'outputs_id': None,
'properties': { 'auto-t2s': False,
'content_type': 'image/png',
'lang': 'es',
'name': 'dfasdfasdf',
'text': 'erwerwerwrwerwr'},
'subtype': 'voicemail-st',
'tags': ['RRR-ccc-dtjkqx'],
'type': 'recording'}]
This is the detailed exception:
Traceback (most recent call last):
File "portal_support_ut.py", line 470, in test_UpdateDoc
self.ps.UpdateDoc(self.org_id, what, doc_id, new_data)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/ps/complex_ops.py", line 349, in UpdateDoc
success, doc = database.UpdateDoc(doc_id, new_data)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/cdb/core/updater.py", line 38, in UpdateDoc
res = self.SaveDoc(doc_id, doc)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/cdb/core/saver.py", line 88, in SaveDoc
else : self.bulk_append(doc, flush, update_revision)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/cdb/core/bulker.py", line 257, in bulk_append
if force_send or flush or not self.timer.use_timer : self.BulkSend(show_progress=True)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/cdb/core/bulker.py", line 144, in BulkSend
results = self.UpdateDocuments(self.bulk)
File "/home/gonvaled/projects/new-wavilon-portal/python_modules/wav/cdb/core/bulker.py", line 67, in UpdateDocuments
results = self.db.update(bulkdocs)
File "/home/gonvaled/.virtualenvs/python2.7.3-wavilon1/local/lib/python2.7/site-packages/couchdb/client.py", line 764, in update
_, _, data = self.resource.post_json('_bulk_docs', body=content)
File "/home/gonvaled/.virtualenvs/python2.7.3-wavilon1/local/lib/python2.7/site-packages/couchdb/http.py", line 527, in post_json
**params)
File "/home/gonvaled/.virtualenvs/python2.7.3-wavilon1/local/lib/python2.7/site-packages/couchdb/http.py", line 546, in _request_json
headers=headers, **params)
File "/home/gonvaled/.virtualenvs/python2.7.3-wavilon1/local/lib/python2.7/site-packages/couchdb/http.py", line 542, in _request
credentials=self.credentials)
File "/home/gonvaled/.virtualenvs/python2.7.3-wavilon1/local/lib/python2.7/site-packages/couchdb/http.py", line 398, in request
raise ServerError((status, error))
ServerError: (500, ('badarg', '58'))
What does that badarg mean? Is it possible to send attachments when doing _bulk_docs?
The solution is to remove the data:image/png;base64, prefix before sending the attachment to coudhdb.
For a python alternative, see here.
This was answered in our mailing list, repeating the answer here for completeness.
The data field was malformed in two ways;
'data': 'data:image/png;base64,iVBORw0KGgoAA....'
The 'data:image/png;base64,' prefix is wrong, and the base64 part was malformed (CouchDB obviously needs to decode it to store it).

Categories

Resources