django loading millions of rows

django loading millions of rows - python

So I need to iterate through millions of rows using django. This does not work with the way django will grab them from the DB, so I looked into server side cursors.
I attempted the exact code that the github suggests:
from djorm_core.postgresql import server_side_cursors
with server_side_cursors():
for item in MyModel.objects.all():
#do something
and I got the following error:
(I cannot show the actual line where the error started, but it started on the equivalent of for item in myModel.objects.all() above)
/usr/local/lib/python2.7/dist-packages/django/db/models/query.pyc in __iter__(self)
94 - Responsible for turning the rows into model objects.
95 """
---> 96 self._fetch_all()
97 return iter(self._result_cache)
98
/usr/local/lib/python2.7/dist-packages/django/db/models/query.pyc in _fetch_all(self)
852 def _fetch_all(self):
853 if self._result_cache is None:
--> 854 self._result_cache = list(self.iterator())
855 if self._prefetch_related_lookups and not self._prefetch_done:
856 self._prefetch_related_objects()
/usr/local/lib/python2.7/dist-packages/django/db/models/query.pyc in iterator(self)
941 names = extra_names + field_names + aggregate_names
942
--> 943 for row in self.query.get_compiler(self.db).results_iter():
944 yield dict(zip(names, row))
945
/usr/local/lib/python2.7/dist-packages/django/db/models/sql/compiler.pyc in results_iter(self)
707 fields = None
708 has_aggregate_select = bool(self.query.aggregate_select)
--> 709 for rows in self.execute_sql(MULTI):
710 for row in rows:
711 if has_aggregate_select:
/usr/local/lib/python2.7/dist-packages/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
780
781 cursor = self.connection.cursor()
--> 782 cursor.execute(sql, params)
783
784 if not result_type:
/usr/local/lib/python2.7/dist-packages/django/db/backends/util.pyc in execute(self, sql, params)
67 start = time()
68 try:
---> 69 return super(CursorDebugWrapper, self).execute(sql, params)
70 finally:
71 stop = time()
/usr/local/lib/python2.7/dist-packages/django/db/backends/util.pyc in execute(self, sql, params)
51 return self.cursor.execute(sql)
52 else:
---> 53 return self.cursor.execute(sql, params)
54
55 def executemany(self, sql, param_list):
/usr/local/lib/python2.7/dist-packages/django/db/utils.pyc in __exit__(self, exc_type, exc_value, traceback)
97 if dj_exc_type not in (DataError, IntegrityError):
98 self.wrapper.errors_occurred = True
---> 99 six.reraise(dj_exc_type, dj_exc_value, traceback)
100
101 def __call__(self, func):
/usr/local/lib/python2.7/dist-packages/django/db/backends/util.pyc in execute(self, sql, params)
51 return self.cursor.execute(sql)
52 else:
---> 53 return self.cursor.execute(sql, params)
54
55 def executemany(self, sql, param_list):
ProgrammingError: can't use a named cursor outside of transactions
is there another way of doing the equivalent? Is there a way to still use this module or is it simply broken as this is exactly what the github account suggests?

Related

Python Multiprocessing Parallel Insert Into Oracle SQL

I'm currently trying to create a table and insert values in an Oracle SQL database.
I managed to make it work using df.to_sql(name=table_name, con=conn, if_exists='append', index=False) but it took 1h30m to upload a DataFrame of only 10000 rows * 5 columns.
This made me look into Multiprocessing, so I tried following the answer Siddhi Kiran Bajracharya gave in this thread
Which turned out like this:
import pandas as pd
from sqlalchemy import create_engine
import config
LOCATION = r"C:\Oracle\instantclient_19_6"
os.environ["PATH"] = LOCATION + ";" + os.environ["PATH"]
conn = create_engine('oracle+cx_oracle://' + config.user + ':' + config.pw +
'#' + config.host + ':' + config.port + '/?service_name=' + config.db +'?charset=latin-1')
import math
from multiprocessing.dummy import Pool as ThreadPool
def insert_df(df, *args, **kwargs):
nworkers = 4 # number of workers that executes insert in parallel fashion
chunk = math.floor(df.shape[0] / nworkers) # number of chunks
chunks = [(chunk * i, (chunk * i) + chunk) for i in range(nworkers)]
chunks.append((chunk * nworkers, df.shape[0]))
pool = ThreadPool(nworkers)
def worker(chunk):
i, j = chunk
df.iloc[i:j, :].to_sql(*args, **kwargs)
pool.map(worker, chunks)
pool.close()
pool.join()
insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
The problem is that this last code runs for 20mins, only inserts 9 rows into the Table, and then raises the following error DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
Full Traceback:
---------------------------------------------------------------------------
DatabaseError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
DatabaseError: ORA-00955: name is already used by an existing object
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
<ipython-input-73-b50275447767> in <module>
20
21
---> 22 insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
<ipython-input-73-b50275447767> in insert_df(df, *args, **kwargs)
14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
---> 16 pool.map(worker, chunks)
17 pool.close()
18 pool.join()
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
<ipython-input-73-b50275447767> in worker(chunk)
12 def worker(chunk):
13 i, j = chunk
---> 14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
16 pool.map(worker, chunks)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2710 chunksize=chunksize,
2711 dtype=dtype,
-> 2712 method=method,
2713 )
2714
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1317 dtype=dtype,
1318 )
-> 1319 table.create()
1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in create(self)
654 )
655 else:
--> 656 self._execute_create()
657
658 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_create(self)
636 # Inserting table into database, add to MetaData object
637 self.table = self.table.tometadata(self.pd_sql.meta)
--> 638 self.table.create()
639
640 def create(self):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2044 ):
2045 with self._optional_conn_ctx_manager(connection) as conn:
-> 2046 conn._run_visitor(visitorcallable, element, **kwargs)
2047
2048 class _trans_ctx(object):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1613
1614 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1615 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1616
1617
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints,
827 )
828 # fmt: on
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
986 raise exc.ObjectNotExecutableError(object_)
987 else:
--> 988 return meth(self, multiparams, params)
989
990 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1048 compiled,
1049 None,
-> 1050 compiled,
1051 )
1052 if self._has_events or self.engine._has_events:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1251 except BaseException as e:
1252 self._handle_dbapi_exception(
-> 1253 e, statement, parameters, cursor, context
1254 )
1255
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1471 util.raise_from_cause(newraise, exc_info)
1472 elif should_wrap:
-> 1473 util.raise_from_cause(sqlalchemy_exception, exc_info)
1474 else:
1475 util.reraise(*exc_info)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1247 if not evt_handled:
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
1251 except BaseException as e:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
578
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
582 def do_execute_no_params(self, cursor, statement, context=None):
DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
[SQL:
CREATE TABLE "TEST_TABLE_DELETE" (
"id" CLOB,
"name" CLOB,
"var1" CLOB,
"var2" CLOB,
"var3" CLOB,
"var4" CLOB,
"var5" CLOB,
"var6" CLOB,
"var7" CLOB,
"var8" CLOB,
"var9" CLOB,
"var10" CLOB,
"var11" CLOB,
"var12" FLOAT,
"var13" CLOB,
"var14" CLOB
)
]
(Background on this error at: http://sqlalche.me/e/4xp6)
Any pointers to help me solve this issue would be greatly appreciated.
Thanks!
Luti

if you're using to_sql, with string columns in your dataframe, you better do something like this:
dtyp = {c:types.VARCHAR(data[c].str.len().max()) for c in data.columns[data.dtypes == 'object'].tolist()}
data.to_sql('table_name.....',con=...,if_exists='append'
, index=False
, dtype = dtyp)
For 10k rows, it should be very fast.

CompileError when trying to run Insert Statement on SQLAlchemy

The script I'm writing requests news article metadata from an API. In response, it receives a page of results containing several news articles. It is designed to process the records one at a time, extracting the data fields from the json dict and inserting them into postgres.
However, when I run the insert operation, the function returns:
CompileError: Unconsumed column names: urlToImage, publishedAt
How do I get this insert operation to work?
Any help would be greatly appreciated!
Here's my code:
from sqlalchemy import MetaData # for getting table metadata
from sqlalchemy import Table # for interacting with tables
from sqlalchemy import create_engine # for creating db engine
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import insert # for getting alterate query method to work
# Create DB engine
engine = create_engine('postgresql+psycopg2://{user}:{password}#{hostip}/{db}'.format(**dbkeys))
# Get metadata objects for tables in database
metadata = MetaData(engine, reflect=True)
nstream = metadata.tables['nstream']
for item in response_page['articles']:
# Convert datetime strings from api into Python datetime format
dtp = datetime.strptime(item['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urlToImage = item['urlToImage'],
publishedAt = dtp,
uploaded2db = datetime.now(),
content = item['content'])
engine.execute(inserttw)
And the full traceback:
CompileError Traceback (most recent call last)
<ipython-input-10-c5f4a6bff45e> in <module>
63 # 2. If query has more than one page, get additional pages
64
---> 65 get_results(tfrom, engine = engine, max_retries = 5)
<ipython-input-10-c5f4a6bff45e> in get_results(tfrom, engine, max_retries)
39
40 # Append the results to the database using the helper
---> 41 process_page(results)
42
43 # If there is an exception, add to the retry counter and then sleep.
<ipython-input-10-c5f4a6bff45e> in process_page(response_page)
26 uploaded2db = datetime.now(),
27 content = item['content'])
---> 28 engine.execute(inserttw)
29
30 def get_results(tfrom, engine = engine, max_retries = 5):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
2073
2074 connection = self.contextual_connect(close_with_result=True)
-> 2075 return connection.execute(statement, *multiparams, **params)
2076
2077 def scalar(self, statement, *multiparams, **params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
1051 inline=len(distilled_params) > 1,
1052 schema_translate_map=self.schema_for_object
-> 1053 if not self.schema_for_object.is_default else None)
1054
1055 ret = self._execute_context(
<string> in <lambda>(self, bind, dialect, **kw)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in compile(self, default, bind, dialect, **kw)
440 else:
441 dialect = default.StrCompileDialect()
--> 442 return self._compiler(dialect, bind=bind, **kw)
443
444 def _compiler(self, dialect, **kw):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _compiler(self, dialect, **kw)
446 Dialect."""
447
--> 448 return dialect.statement_compiler(dialect, self, **kw)
449
450 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, column_keys, inline, **kwargs)
451 # dialect.label_length or dialect.max_identifier_length
452 self.truncated_names = {}
--> 453 Compiled.__init__(self, dialect, statement, **kwargs)
454
455 if (
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, bind, schema_translate_map, compile_kwargs)
217 if self.can_execute:
218 self.execution_options = statement._execution_options
--> 219 self.string = self.process(self.statement, **compile_kwargs)
220
221 #util.deprecated("0.7", ":class:`.Compiled` objects now compile "
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in process(self, obj, **kwargs)
243
244 def process(self, obj, **kwargs):
--> 245 return obj._compiler_dispatch(self, **kwargs)
246
247 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/visitors.py in _compiler_dispatch(self, visitor, **kw)
79 raise exc.UnsupportedCompilationError(visitor, cls)
80 else:
---> 81 return meth(self, **kw)
82 else:
83 # The optimization opportunity is lost for this case because the
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in visit_insert(self, insert_stmt, asfrom, **kw)
2057
2058 crud_params = crud._setup_crud_params(
-> 2059 self, insert_stmt, crud.ISINSERT, **kw)
2060
2061 if not crud_params and \
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _setup_crud_params(compiler, stmt, local_stmt_type, **kw)
55 try:
56 if local_stmt_type in (ISINSERT, ISUPDATE):
---> 57 return _get_crud_params(compiler, stmt, **kw)
58 finally:
59 if should_restore:
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _get_crud_params(compiler, stmt, **kw)
144 raise exc.CompileError(
145 "Unconsumed column names: %s" %
--> 146 (", ".join("%s" % c for c in check))
147 )
148
CompileError: Unconsumed column names: urlToImage, publishedAt

The problem, as it turned out was that I was capitalizing the column names wrong.
I figured this out by using SQLalchemy's inspector function. The column names were there, they were just in lower case.
from sqlalchemy import create_engine
from sqlalchemy.engine import reflection
insp = reflection.Inspector.from_engine(engine)
print(insp.get_columns(nstream))
The source of the confusion is that when you create tables in Postgres, Postgres will automatically lower case your column names unless you use quotes when naming them.
This is the working version of the insert command:
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urltoimage = item['urlToImage'],
publishedat = dtp,
uploaded2db = datetime.now(),
content = item['content'])

String concatenation by using the value of the model field while updating a queryset

I want to update the email id of some users to: "prefix" + "value of the user email"
Now I can do this for one user as follows:
User.objects.filter(pk=<id>).update(email=Concat(Value("prefix"), 'email'))
However, as soon as I filter on a pk list, I get a nasty error. The query is:
User.objects.filter(pk__in=<list_id>).update(email=Concat(Value("prefix"), 'email'))
The error is:
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/query.pyc in update(self, **kwargs)
561 query.add_update_values(kwargs)
562 with transaction.atomic(using=self.db, savepoint=False):
--> 563 rows = query.get_compiler(self.db).execute_sql(CURSOR)
564 self._result_cache = None
565 return rows
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
1060 related queries are not available.
1061 """
-> 1062 cursor = super(SQLUpdateCompiler, self).execute_sql(result_type)
1063 try:
1064 rows = cursor.rowcount if cursor else 0
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
838 cursor = self.connection.cursor()
839 try:
--> 840 cursor.execute(sql, params)
841 except Exception:
842 cursor.close()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
77 start = time()
78 try:
---> 79 return super(CursorDebugWrapper, self).execute(sql, params)
80 finally:
81 stop = time()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/utils.pyc in __exit__(self, exc_type, exc_value, traceback)
96 if dj_exc_type not in (DataError, IntegrityError):
97 self.wrapper.errors_occurred = True
---> 98 six.reraise(dj_exc_type, dj_exc_value, traceback)
99
100 def __call__(self, func):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/mysql/base.pyc in execute(self, query, args)
122 try:
123 # args is None means no string interpolation
--> 124 return self.cursor.execute(query, args)
125 except Database.OperationalError as e:
126 # Map some error codes to IntegrityError, since they seem to be
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/MySQLdb/cursors.pyc in execute(self, query, args)
224 except Exception:
225 exc, value = sys.exc_info()[:2]
--> 226 self.errorhandler(self, exc, value)
227 self._executed = query
228 if not self._defer_warnings: self._warning_check()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/MySQLdb/connections.pyc in defaulterrorhandler(***failed resolving arguments***)
34 del connection
35 if isinstance(errorvalue, BaseException):
---> 36 raise errorvalue
37 if errorclass is not None:
38 raise errorclass(errorvalue)
OperationalError: (1093, "You can't specify target table 'auth_user' for update in FROM clause")
I don't really understand what the error message is trying to say. Any help would be appreciated.
EDIT:
MySQL version: 5.7.12
Django: 1.8

What does your <list_id> look like? What's the result of User.objects.filter(pk__in=<list_id>)?
In Django 1.10, I have no problem running:
>>> User.objects.filter(pk=1).update(first_name='Test')
1L
>>> User.objects.filter(pk__in=[1, 2]).update(first_name='Test')
2L
As for workarounds, obviously it isn't as efficient, but why not just cache the QuerySet and then iterate over it to update each object? This is (hopefully) a one-time query, so I'm not sure how much of a concern SQL efficiency is.

This should work :)
User.objects.filter(pk__in=<list_id>).update(email=Concat(Value("prefix"), F('email')))

I had the same problem, but solved by trying to convert queryset list_ids to a list(), like this:
ids = Transactions.objects.values_list('id', flat=True)
# This may raise error:
User.objects.filter(pk__in=ids).update(first_name='Test')
# But if convert ids to a list, should work:
list_ids = list(ids)
User.objects.filter(pk__in=list_ids).update(first_name='Test')
Hope it helps :)

Create Case query via Django ORM

Using Django 1.8 and Postgres 9.3 - I have a Django model as follows which contains log lines. I would like to extract all domain_name values that were never allowed access.
class Logs(models.Model):
date = DateTimeField(db_index=True)
action = TextField(default='a', null=True, blank=True, db_index=True)
url = TextField(null=True, blank=True)
stats = JsonField(null=True, blank=True)
domain_name = TextField(null=True, blank=True, db_index=True)
<snip>
This SQL query works nicely, but I'm having trouble translating it to a Django ORM queryset.
select domain_name from reporter_log
GROUP BY domain_name
HAVING COUNT(CASE WHEN action = 'a' OR action = 'ae' then 1 END) = 0;
I would have expected the following queryset would work:
LogLine.objects.annotate(
allowed=Count(Case(
When(action='a', then=Value(1)),
When(action='ae', then=Value(1)),
default=Value(0),
output_field=IntegerField(),
)
))
But I get a traceback:
ProgrammingError Traceback (most recent call last)
<ipython-input-4-feb056328fe8> in <module>()
4 When(action='ae', then=Value(1)),
5 default=Value(0),
----> 6 output_field=IntegerField(),
7 )
8 ))
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in __call__(self, result)
245 self.start_displayhook()
246 self.write_output_prompt()
--> 247 format_dict, md_dict = self.compute_format_data(result)
248 self.write_format_data(format_dict, md_dict)
249 self.update_user_ns(result)
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in compute_format_data(self, result)
155
156 """
--> 157 return self.shell.display_formatter.format(result)
158
159 def write_format_data(self, format_dict, md_dict=None):
/usr/lib/python3/dist-packages/IPython/core/formatters.py in format(self, obj, include, exclude)
150 md = None
151 try:
--> 152 data = formatter(obj)
153 except:
154 # FIXME: log the exception
/usr/lib/python3/dist-packages/IPython/core/formatters.py in __call__(self, obj)
478 type_pprinters=self.type_printers,
479 deferred_pprinters=self.deferred_printers)
--> 480 printer.pretty(obj)
481 printer.flush()
482 return stream.getvalue()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
361 if isinstance(meth, collections.Callable):
362 return meth(obj, self, cycle)
--> 363 return _default_pprint(obj, self, cycle)
364 finally:
365 self.end_group()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
481 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
482 # A user-provided repr.
--> 483 p.text(repr(obj))
484 return
485 p.begin_group(1, '<')
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __repr__(self)
136
137 def __repr__(self):
--> 138 data = list(self[:REPR_OUTPUT_SIZE + 1])
139 if len(data) > REPR_OUTPUT_SIZE:
140 data[-1] = "...(remaining elements truncated)..."
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __iter__(self)
160 - Responsible for turning the rows into model objects.
161 """
--> 162 self._fetch_all()
163 return iter(self._result_cache)
164
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in _fetch_all(self)
963 def _fetch_all(self):
964 if self._result_cache is None:
--> 965 self._result_cache = list(self.iterator())
966 if self._prefetch_related_lookups and not self._prefetch_done:
967 self._prefetch_related_objects()
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in iterator(self)
236 # Execute the query. This will also fill compiler.select, klass_info,
237 # and annotations.
--> 238 results = compiler.execute_sql()
239 select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
240 compiler.annotation_col_map)
/usr/local/lib/python3.4/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
838 cursor = self.connection.cursor()
839 try:
--> 840 cursor.execute(sql, params)
841 except Exception:
842 cursor.close()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
77 start = time()
78 try:
---> 79 return super(CursorDebugWrapper, self).execute(sql, params)
80 finally:
81 stop = time()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/usr/local/lib/python3.4/dist-packages/django/db/utils.py in __exit__(self, exc_type, exc_value, traceback)
95 if dj_exc_type not in (DataError, IntegrityError):
96 self.wrapper.errors_occurred = True
---> 97 six.reraise(dj_exc_type, dj_exc_value, traceback)
98
99 def __call__(self, func):
/usr/local/lib/python3.4/dist-packages/django/utils/six.py in reraise(tp, value, tb)
656 value = tp()
657 if value.__traceback__ is not tb:
--> 658 raise value.with_traceback(tb)
659 raise value
660
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
ProgrammingError: could not identify an equality operator for type json
LINE 1: ...e"."domain_name", "reporter_logs"."rule_type", "reporter_...
^
Here is the output of the query that fails.
[{'sql': 'SELECT "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence", COUNT(CASE WHEN "reporter_logs"."action" = \'a\' THEN 1 WHEN "reporter_logs"."action" = \'ae\' THEN 1 ELSE 0 END) AS "allowed" FROM "reporter_logs" GROUP BY "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence" LIMIT 21', 'time': '0.002'}]
Any suggestions would be appreciated. Including suggestions about alternate ways to write the query.

Try by not using Q function, use separated When instead:
Logs.objects.annotate(
allowed=Count(Case(
When(action='a', then=1),
When(action='ae', then=1),
default=0,
output_field=IntegerField()
)
)).values('date')

Well, thanks #knbk and #Gocht for trying. I finally found the solution to my problem. I had to specify that I only wanted the domain_name column, which took care of the traceback.
Also, I had to use Sum instead of Count to return 0 for all domains that had no matches. Count returned 1 for nonmatching values.
Logs.objects.values('domain_name').annotate(
allowed=Sum(Case(
When(Q(action='a') |Q(action='ae'), then=Value(1)),
default=Value(0),
output_field=IntegerField()
)
))

Django Not implemented work around

So this question has been asked before, but there is no answer. I know that joining an annotated queryset with a distinct one is not implemented in Django, but the question is: what would an alternate way of doing this be?
code example:
qs1 = Example.objects.filter(...).annotate(...)
qs2 = Example.objects.filter(...).distinct(...)
from itertools import chain
answer = chain(qs1,qs2)
but this will return the following error because "it is not implemented" in Django:
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/query.pyc in __iter__(self)
94 - Responsible for turning the rows into model objects.
95 """
---> 96 self._fetch_all()
97 return iter(self._result_cache)
98
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/query.pyc in _fetch_all(self)
852 def _fetch_all(self):
853 if self._result_cache is None:
--> 854 self._result_cache = list(self.iterator())
855 if self._prefetch_related_lookups and not self._prefetch_done:
856 self._prefetch_related_objects()
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/query.pyc in iterator(self)
218 klass_info = get_klass_info(model, max_depth=max_depth,
219 requested=requested, only_load=only_load)
--> 220 for row in compiler.results_iter():
221 if fill_cache:
222 obj, _ = get_cached_row(row, index_start, db, klass_info,
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/sql/compiler.pyc in results_iter(self)
708 fields = None
709 has_aggregate_select = bool(self.query.aggregate_select)
--> 710 for rows in self.execute_sql(MULTI):
711 for row in rows:
712 if has_aggregate_select:
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
769 """
770 try:
--> 771 sql, params = self.as_sql()
772 if not sql:
773 raise EmptyResultSet
/Library/Python/2.7/site-packages/Django-1.6-py2.7.egg/django/db/models/sql/compiler.pyc in as_sql(self, with_limits, with_col_aliases)
119 if distinct_fields:
120 raise NotImplementedError(
--> 121 "annotate() + distinct(fields) not implemented.")
122 if not ordering:
123 ordering = self.connection.ops.force_no_ordering()
NotImplementedError: annotate() + distinct(fields) not implemented.
So, again, the question is: What is some way to accomplish chaining these querysets?

I had to do something like this some time back, so what your doing with iter tools is right, you will have to cast it to list.
from itertools import chain
cars = Cars.objects.all()
trucks = Truck.objects.all()
all_vechiles = chain( list(cars), list(trucks) )
source here:http://mushfiq.me/2013/08/04/django-merging-to-queryset-using-itertools/

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

django loading millions of rows - python

Related

Python Multiprocessing Parallel Insert Into Oracle SQL

CompileError when trying to run Insert Statement on SQLAlchemy

String concatenation by using the value of the model field while updating a queryset

Create Case query via Django ORM

Django Not implemented work around

Categories

Resources