CompileError when trying to run Insert Statement on SQLAlchemy - python

The script I'm writing requests news article metadata from an API. In response, it receives a page of results containing several news articles. It is designed to process the records one at a time, extracting the data fields from the json dict and inserting them into postgres.
However, when I run the insert operation, the function returns:
CompileError: Unconsumed column names: urlToImage, publishedAt
How do I get this insert operation to work?
Any help would be greatly appreciated!
Here's my code:
from sqlalchemy import MetaData # for getting table metadata
from sqlalchemy import Table # for interacting with tables
from sqlalchemy import create_engine # for creating db engine
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import insert # for getting alterate query method to work
# Create DB engine
engine = create_engine('postgresql+psycopg2://{user}:{password}#{hostip}/{db}'.format(**dbkeys))
# Get metadata objects for tables in database
metadata = MetaData(engine, reflect=True)
nstream = metadata.tables['nstream']
for item in response_page['articles']:
# Convert datetime strings from api into Python datetime format
dtp = datetime.strptime(item['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urlToImage = item['urlToImage'],
publishedAt = dtp,
uploaded2db = datetime.now(),
content = item['content'])
engine.execute(inserttw)
And the full traceback:
CompileError Traceback (most recent call last)
<ipython-input-10-c5f4a6bff45e> in <module>
63 # 2. If query has more than one page, get additional pages
64
---> 65 get_results(tfrom, engine = engine, max_retries = 5)
<ipython-input-10-c5f4a6bff45e> in get_results(tfrom, engine, max_retries)
39
40 # Append the results to the database using the helper
---> 41 process_page(results)
42
43 # If there is an exception, add to the retry counter and then sleep.
<ipython-input-10-c5f4a6bff45e> in process_page(response_page)
26 uploaded2db = datetime.now(),
27 content = item['content'])
---> 28 engine.execute(inserttw)
29
30 def get_results(tfrom, engine = engine, max_retries = 5):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
2073
2074 connection = self.contextual_connect(close_with_result=True)
-> 2075 return connection.execute(statement, *multiparams, **params)
2076
2077 def scalar(self, statement, *multiparams, **params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
1051 inline=len(distilled_params) > 1,
1052 schema_translate_map=self.schema_for_object
-> 1053 if not self.schema_for_object.is_default else None)
1054
1055 ret = self._execute_context(
<string> in <lambda>(self, bind, dialect, **kw)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in compile(self, default, bind, dialect, **kw)
440 else:
441 dialect = default.StrCompileDialect()
--> 442 return self._compiler(dialect, bind=bind, **kw)
443
444 def _compiler(self, dialect, **kw):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _compiler(self, dialect, **kw)
446 Dialect."""
447
--> 448 return dialect.statement_compiler(dialect, self, **kw)
449
450 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, column_keys, inline, **kwargs)
451 # dialect.label_length or dialect.max_identifier_length
452 self.truncated_names = {}
--> 453 Compiled.__init__(self, dialect, statement, **kwargs)
454
455 if (
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, bind, schema_translate_map, compile_kwargs)
217 if self.can_execute:
218 self.execution_options = statement._execution_options
--> 219 self.string = self.process(self.statement, **compile_kwargs)
220
221 #util.deprecated("0.7", ":class:`.Compiled` objects now compile "
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in process(self, obj, **kwargs)
243
244 def process(self, obj, **kwargs):
--> 245 return obj._compiler_dispatch(self, **kwargs)
246
247 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/visitors.py in _compiler_dispatch(self, visitor, **kw)
79 raise exc.UnsupportedCompilationError(visitor, cls)
80 else:
---> 81 return meth(self, **kw)
82 else:
83 # The optimization opportunity is lost for this case because the
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in visit_insert(self, insert_stmt, asfrom, **kw)
2057
2058 crud_params = crud._setup_crud_params(
-> 2059 self, insert_stmt, crud.ISINSERT, **kw)
2060
2061 if not crud_params and \
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _setup_crud_params(compiler, stmt, local_stmt_type, **kw)
55 try:
56 if local_stmt_type in (ISINSERT, ISUPDATE):
---> 57 return _get_crud_params(compiler, stmt, **kw)
58 finally:
59 if should_restore:
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _get_crud_params(compiler, stmt, **kw)
144 raise exc.CompileError(
145 "Unconsumed column names: %s" %
--> 146 (", ".join("%s" % c for c in check))
147 )
148
CompileError: Unconsumed column names: urlToImage, publishedAt

The problem, as it turned out was that I was capitalizing the column names wrong.
I figured this out by using SQLalchemy's inspector function. The column names were there, they were just in lower case.
from sqlalchemy import create_engine
from sqlalchemy.engine import reflection
insp = reflection.Inspector.from_engine(engine)
print(insp.get_columns(nstream))
The source of the confusion is that when you create tables in Postgres, Postgres will automatically lower case your column names unless you use quotes when naming them.
This is the working version of the insert command:
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urltoimage = item['urlToImage'],
publishedat = dtp,
uploaded2db = datetime.now(),
content = item['content'])

Related

How to minimize code when using sqlalchemy query and pandas dataframes

I am trying to upgrade my query code by modernizing it.
My old code (bellow). First query joins two tables and selects the rating for each song title together with artist for title and the second gets the genres for each title (association table is used):
items = []
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
for x in query:
dic = {
"rating": x[0],
"title": x[1].title,
"artist": x[1].artist,
"genre": Genre.query.filter(Genre.songs.any(title=x[1].title)).all(),
}
items.append(dic)
My cleaner code. I use pandas dataframes now instead of dictionaries. This gives me the error ArgumentError: SQL expression element or literal value expected, got somethingsomething
query = db.session.query(Rating, Song).filter(Rating.id==Song.id).all()
df = pd.DataFrame(query, columns=["rating", "title"])
for item in df.title:
df['genre'] = (Genre.query.filter(Genre.songs.any(title=item)).all())
How do I get this to work?
Are there more effiecient ways of coding this?
Complete error produced
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1194 try:
-> 1195 meth = statement._execute_on_connection
1196 except AttributeError as err:
AttributeError: 'BaseQuery' object has no attribute '_execute_on_connection'
The above exception was the direct cause of the following exception:
ObjectNotExecutableError Traceback (most recent call last)
<ipython-input-4-99ffacdf2d91> in <module>
1 query = db.session.query(Rating, Song).filter(Rating.id==Song.id)
----> 2 df = pd.read_sql_query(query, db.engine)
3 df
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
381 coerce_float=coerce_float,
382 parse_dates=parse_dates,
--> 383 chunksize=chunksize,
384 )
385
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1292 args = _convert_params(sql, params)
1293
-> 1294 result = self.execute(*args)
1295 columns = result.keys()
1296
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1160 def execute(self, *args, **kwargs):
1161 """Simple passthrough to SQLAlchemy connectable"""
-> 1162 return self.connectable.execution_options().execute(*args, **kwargs)
1163
1164 def read_table(
<string> in execute(self, statement, *multiparams, **params)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
388 if not skip_warning:
389 _warn_with_version(message, version, wtype, stacklevel=3)
--> 390 return fn(*args, **kwargs)
391
392 doc = func.__doc__ is not None and func.__doc__ or ""
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
3036 """
3037 connection = self.connect(close_with_result=True)
-> 3038 return connection.execute(statement, *multiparams, **params)
3039
3040 #util.deprecated_20(
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\engine\base.py in execute(self, statement, *multiparams, **params)
1196 except AttributeError as err:
1197 util.raise_(
-> 1198 exc.ObjectNotExecutableError(statement), replace_context=err
1199 )
1200 else:
~\AppData\Local\Programs\Python\Python37\lib\site-packages\sqlalchemy\util\compat.py in raise_(***failed resolving arguments***)
209
210 try:
--> 211 raise exception
212 finally:
213 # credit to
ObjectNotExecutableError: Not an executable object: <flask_sqlalchemy.BaseQuery object at 0x000001CBC5F14A48>
First, create just one query to return all the data you need in one go, where the grouping of Genres is done uisng the GROUP_CONCAT function:
query = (
db.session
.query(
Rating.rating,
Song.title,
Song.artist,
db.func.GROUP_CONCAT(Genre.category, ", ").label("genres")
)
.select_from(Song)
.where(Rating.id == Song.id)
.join(Genre, Song.genres)
.group_by(
Rating.rating,
Song.title,
Song.artist,
)
)
Then use the pandas method to get it into a dataframe:
df = pd.read_sql_query(query.statement, db.engine)
Where print(df) should produce something like this:
rating title artist genres
0 2.0 title 2 art-2 pop
1 3.0 title 3 art-3 rock, pop
2 4.0 title 4 art-3 other
3 5.0 title 5 art-4 rock, pop, other

SQLAlchemy-TypeError: _get_column_info() got an unexpected keyword argument 'generated'

I'm trying to get metadata of the table from the Redshift database.
I'm getting the error even though the connection is fine.
"TypeError: _get_column_info() got an unexpected keyword argument 'generated'"
I tried with another databse of a different server it works fine...
But not sure whats the issue with this server tables.
Can you please help me out with a solution.
Table=sa.Table("Tablename" ,metadata,autoload=True,autoload_with=engine)
TypeError Traceback (most recent call last)
<ipython-input-98-366ec112cf52> in <module>
----> 1 Table=sa.Table("dim_dealer" ,metadata,autoload=True,autoload_with=engine)
<string> in __new__(cls, *args, **kw)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
126 )
127
--> 128 return fn(*args, **kwargs)
129
130 doc = fn.__doc__ is not None and fn.__doc__ or ""
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in __new__(cls, *args, **kw)
494 except:
495 with util.safe_reraise():
--> 496 metadata._remove_table(name, schema)
497
498 #property
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\util\langhelpers.py in __exit__(self, type_, value, traceback)
66 self._exc_info = None # remove potential circular references
67 if not self.warn_only:
---> 68 compat.reraise(exc_type, exc_value, exc_tb)
69 else:
70 if not compat.py3k and self._exc_info and self._exc_info[1]:
~\AppData\Local\Continuum\anaconda3\lib\`enter code here`site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
151 if value.__traceback__ is not tb:
152 raise value.with_traceback(tb)
--> 153 raise value
154
155 def u(s):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in __new__(cls, *args, **kw)
489 metadata._add_table(name, schema, table)
490 try:
--> 491 table._init(name, metadata, *args, **kw)
492 table.dispatch.after_parent_attach(table, metadata)
493 return table
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in _init(self, name, metadata, *args, **kwargs)
583 include_columns,
584 _extend_on=_extend_on,
--> 585 resolve_fks=resolve_fks,
586 )
587
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in _autoload(self, metadata, autoload_with, include_columns, exclude_columns, resolve_fks, _extend_on)
607 exclude_columns,
608 resolve_fks,
--> 609 _extend_on=_extend_on,
610 )
611 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in run_callable(self, callable_, *args, **kwargs)
2148 """
2149 with self._contextual_connect() as conn:
-> 2150 return conn.run_callable(callable_, *args, **kwargs)
2151
2152 def execute(self, statement, *multiparams, **params):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in run_callable(self, callable_, *args, **kwargs)
1602
1603 """
-> 1604 return callable_(self, *args, **kwargs)
1605
1606 def _run_visitor(self, visitorcallable, element, **kwargs):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\default.py in reflecttable(self, connection, table, include_columns, exclude_columns, resolve_fks, **opts)
429 insp = reflection.Inspector.from_engine(connection)
430 return insp.reflecttable(
--> 431 table, include_columns, exclude_columns, resolve_fks, **opts
432 )
433
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in reflecttable(self, table, include_columns, exclude_columns, resolve_fks, _extend_on)
638
639 for col_d in self.get_columns(
--> 640 table_name, schema, **table.dialect_kwargs
641 ):
642 found_table = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in get_columns(self, table_name, schema, **kw)
371
372 col_defs = self.dialect.get_columns(
--> 373 self.bind, table_name, schema, info_cache=self.info_cache, **kw
374 )
375 for col_def in col_defs:
<string> in get_columns(self, connection, table_name, schema, **kw)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in cache(fn, self, con, *args, **kw)
54 ret = info_cache.get(key)
55 if ret is None:
---> 56 ret = fn(self, con, *args, **kw)
57 info_cache[key] = ret
58 return ret
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy_redshift\dialect.py in get_columns(self, connection, table_name, schema, **kw)
459 default=col.default, notnull=col.notnull, domains=domains,
460 enums=[], schema=col.schema, encode=col.encode,
--> 461 comment=col.comment)
462 columns.append(column_info)
463 return columns
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy_redshift\dialect.py in _get_column_info(self, *args, **kwargs)
666 column_info = super(RedshiftDialect, self)._get_column_info(
667 *args,
--> 668 **kw
669 )
670 if isinstance(column_info['type'], VARCHAR):
TypeError: _get_column_info() got an unexpected keyword argument 'generated'
print(repr(metadata.tables[Table]))
Thanks in advance
It looks like a backwards compatibility bug between SQLAlchemy and SQLAlchemy-Redshift.
Private method RedshiftDialect._get_column_info was overriden in SQLAlchemy-Redshift. generated keyword argument was added to this method in SQLAlchemy v1.3.16 which caused compatibility error. So a fix for this problem was implemented: generated keyword should be used only for the latest versions of SQLAlchemy. Unfortunately it doesn't work:
if sa.__version__ >= '1.3.16':
# SQLAlchemy 1.3.16 introduced generated columns,
# not supported in redshift
kw['generated'] = ''
As you can see this condition is truthy for your SQLAlchemy version ("1.3.7") because this is how string comparison works. I think I will make a pull request to correct this behaviour.
I think the most simple solution for you for now is to update your SQLAlchemy package to the 1.3.10 version or newer. In this case this condition will work as expected.
Update: This bug was fixed in the SQLAlchemy-Redshift v0.8.0.

Python Multiprocessing Parallel Insert Into Oracle SQL

I'm currently trying to create a table and insert values in an Oracle SQL database.
I managed to make it work using df.to_sql(name=table_name, con=conn, if_exists='append', index=False) but it took 1h30m to upload a DataFrame of only 10000 rows * 5 columns.
This made me look into Multiprocessing, so I tried following the answer Siddhi Kiran Bajracharya gave in this thread
Which turned out like this:
import pandas as pd
from sqlalchemy import create_engine
import config
LOCATION = r"C:\Oracle\instantclient_19_6"
os.environ["PATH"] = LOCATION + ";" + os.environ["PATH"]
conn = create_engine('oracle+cx_oracle://' + config.user + ':' + config.pw +
'#' + config.host + ':' + config.port + '/?service_name=' + config.db +'?charset=latin-1')
import math
from multiprocessing.dummy import Pool as ThreadPool
def insert_df(df, *args, **kwargs):
nworkers = 4 # number of workers that executes insert in parallel fashion
chunk = math.floor(df.shape[0] / nworkers) # number of chunks
chunks = [(chunk * i, (chunk * i) + chunk) for i in range(nworkers)]
chunks.append((chunk * nworkers, df.shape[0]))
pool = ThreadPool(nworkers)
def worker(chunk):
i, j = chunk
df.iloc[i:j, :].to_sql(*args, **kwargs)
pool.map(worker, chunks)
pool.close()
pool.join()
insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
The problem is that this last code runs for 20mins, only inserts 9 rows into the Table, and then raises the following error DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
Full Traceback:
---------------------------------------------------------------------------
DatabaseError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
DatabaseError: ORA-00955: name is already used by an existing object
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
<ipython-input-73-b50275447767> in <module>
20
21
---> 22 insert_df(df, f'{table_name}', conn, if_exists='append', index=False)
<ipython-input-73-b50275447767> in insert_df(df, *args, **kwargs)
14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
---> 16 pool.map(worker, chunks)
17 pool.close()
18 pool.join()
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
<ipython-input-73-b50275447767> in worker(chunk)
12 def worker(chunk):
13 i, j = chunk
---> 14 df.iloc[i:j, :].to_sql(*args, **kwargs)
15
16 pool.map(worker, chunks)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2710 chunksize=chunksize,
2711 dtype=dtype,
-> 2712 method=method,
2713 )
2714
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1317 dtype=dtype,
1318 )
-> 1319 table.create()
1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in create(self)
654 )
655 else:
--> 656 self._execute_create()
657
658 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_create(self)
636 # Inserting table into database, add to MetaData object
637 self.table = self.table.tometadata(self.pd_sql.meta)
--> 638 self.table.create()
639
640 def create(self):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2044 ):
2045 with self._optional_conn_ctx_manager(connection) as conn:
-> 2046 conn._run_visitor(visitorcallable, element, **kwargs)
2047
2048 class _trans_ctx(object):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1613
1614 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1615 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1616
1617
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints,
827 )
828 # fmt: on
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
986 raise exc.ObjectNotExecutableError(object_)
987 else:
--> 988 return meth(self, multiparams, params)
989
990 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1048 compiled,
1049 None,
-> 1050 compiled,
1051 )
1052 if self._has_events or self.engine._has_events:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1251 except BaseException as e:
1252 self._handle_dbapi_exception(
-> 1253 e, statement, parameters, cursor, context
1254 )
1255
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1471 util.raise_from_cause(newraise, exc_info)
1472 elif should_wrap:
-> 1473 util.raise_from_cause(sqlalchemy_exception, exc_info)
1474 else:
1475 util.reraise(*exc_info)
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1247 if not evt_handled:
1248 self.dialect.do_execute(
-> 1249 cursor, statement, parameters, context
1250 )
1251 except BaseException as e:
C:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
578
579 def do_execute(self, cursor, statement, parameters, context=None):
--> 580 cursor.execute(statement, parameters)
581
582 def do_execute_no_params(self, cursor, statement, context=None):
DatabaseError: (cx_Oracle.DatabaseError) ORA-00955: name is already used by an existing object
[SQL:
CREATE TABLE "TEST_TABLE_DELETE" (
"id" CLOB,
"name" CLOB,
"var1" CLOB,
"var2" CLOB,
"var3" CLOB,
"var4" CLOB,
"var5" CLOB,
"var6" CLOB,
"var7" CLOB,
"var8" CLOB,
"var9" CLOB,
"var10" CLOB,
"var11" CLOB,
"var12" FLOAT,
"var13" CLOB,
"var14" CLOB
)
]
(Background on this error at: http://sqlalche.me/e/4xp6)
Any pointers to help me solve this issue would be greatly appreciated.
Thanks!
Luti
if you're using to_sql, with string columns in your dataframe, you better do something like this:
dtyp = {c:types.VARCHAR(data[c].str.len().max()) for c in data.columns[data.dtypes == 'object'].tolist()}
data.to_sql('table_name.....',con=...,if_exists='append'
, index=False
, dtype = dtyp)
For 10k rows, it should be very fast.

String concatenation by using the value of the model field while updating a queryset

I want to update the email id of some users to: "prefix" + "value of the user email"
Now I can do this for one user as follows:
User.objects.filter(pk=<id>).update(email=Concat(Value("prefix"), 'email'))
However, as soon as I filter on a pk list, I get a nasty error. The query is:
User.objects.filter(pk__in=<list_id>).update(email=Concat(Value("prefix"), 'email'))
The error is:
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/query.pyc in update(self, **kwargs)
561 query.add_update_values(kwargs)
562 with transaction.atomic(using=self.db, savepoint=False):
--> 563 rows = query.get_compiler(self.db).execute_sql(CURSOR)
564 self._result_cache = None
565 return rows
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
1060 related queries are not available.
1061 """
-> 1062 cursor = super(SQLUpdateCompiler, self).execute_sql(result_type)
1063 try:
1064 rows = cursor.rowcount if cursor else 0
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/models/sql/compiler.pyc in execute_sql(self, result_type)
838 cursor = self.connection.cursor()
839 try:
--> 840 cursor.execute(sql, params)
841 except Exception:
842 cursor.close()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
77 start = time()
78 try:
---> 79 return super(CursorDebugWrapper, self).execute(sql, params)
80 finally:
81 stop = time()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/utils.pyc in __exit__(self, exc_type, exc_value, traceback)
96 if dj_exc_type not in (DataError, IntegrityError):
97 self.wrapper.errors_occurred = True
---> 98 six.reraise(dj_exc_type, dj_exc_value, traceback)
99
100 def __call__(self, func):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/utils.pyc in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/django/db/backends/mysql/base.pyc in execute(self, query, args)
122 try:
123 # args is None means no string interpolation
--> 124 return self.cursor.execute(query, args)
125 except Database.OperationalError as e:
126 # Map some error codes to IntegrityError, since they seem to be
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/MySQLdb/cursors.pyc in execute(self, query, args)
224 except Exception:
225 exc, value = sys.exc_info()[:2]
--> 226 self.errorhandler(self, exc, value)
227 self._executed = query
228 if not self._defer_warnings: self._warning_check()
/Users/zishanahmad/Devel/Env/venv_sliderule/lib/python2.7/site-packages/MySQLdb/connections.pyc in defaulterrorhandler(***failed resolving arguments***)
34 del connection
35 if isinstance(errorvalue, BaseException):
---> 36 raise errorvalue
37 if errorclass is not None:
38 raise errorclass(errorvalue)
OperationalError: (1093, "You can't specify target table 'auth_user' for update in FROM clause")
I don't really understand what the error message is trying to say. Any help would be appreciated.
EDIT:
MySQL version: 5.7.12
Django: 1.8
What does your <list_id> look like? What's the result of User.objects.filter(pk__in=<list_id>)?
In Django 1.10, I have no problem running:
>>> User.objects.filter(pk=1).update(first_name='Test')
1L
>>> User.objects.filter(pk__in=[1, 2]).update(first_name='Test')
2L
As for workarounds, obviously it isn't as efficient, but why not just cache the QuerySet and then iterate over it to update each object? This is (hopefully) a one-time query, so I'm not sure how much of a concern SQL efficiency is.
This should work :)
User.objects.filter(pk__in=<list_id>).update(email=Concat(Value("prefix"), F('email')))
I had the same problem, but solved by trying to convert queryset list_ids to a list(), like this:
ids = Transactions.objects.values_list('id', flat=True)
# This may raise error:
User.objects.filter(pk__in=ids).update(first_name='Test')
# But if convert ids to a list, should work:
list_ids = list(ids)
User.objects.filter(pk__in=list_ids).update(first_name='Test')
Hope it helps :)

Create Case query via Django ORM

Using Django 1.8 and Postgres 9.3 - I have a Django model as follows which contains log lines. I would like to extract all domain_name values that were never allowed access.
class Logs(models.Model):
date = DateTimeField(db_index=True)
action = TextField(default='a', null=True, blank=True, db_index=True)
url = TextField(null=True, blank=True)
stats = JsonField(null=True, blank=True)
domain_name = TextField(null=True, blank=True, db_index=True)
<snip>
This SQL query works nicely, but I'm having trouble translating it to a Django ORM queryset.
select domain_name from reporter_log
GROUP BY domain_name
HAVING COUNT(CASE WHEN action = 'a' OR action = 'ae' then 1 END) = 0;
I would have expected the following queryset would work:
LogLine.objects.annotate(
allowed=Count(Case(
When(action='a', then=Value(1)),
When(action='ae', then=Value(1)),
default=Value(0),
output_field=IntegerField(),
)
))
But I get a traceback:
ProgrammingError Traceback (most recent call last)
<ipython-input-4-feb056328fe8> in <module>()
4 When(action='ae', then=Value(1)),
5 default=Value(0),
----> 6 output_field=IntegerField(),
7 )
8 ))
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in __call__(self, result)
245 self.start_displayhook()
246 self.write_output_prompt()
--> 247 format_dict, md_dict = self.compute_format_data(result)
248 self.write_format_data(format_dict, md_dict)
249 self.update_user_ns(result)
/usr/lib/python3/dist-packages/IPython/core/displayhook.py in compute_format_data(self, result)
155
156 """
--> 157 return self.shell.display_formatter.format(result)
158
159 def write_format_data(self, format_dict, md_dict=None):
/usr/lib/python3/dist-packages/IPython/core/formatters.py in format(self, obj, include, exclude)
150 md = None
151 try:
--> 152 data = formatter(obj)
153 except:
154 # FIXME: log the exception
/usr/lib/python3/dist-packages/IPython/core/formatters.py in __call__(self, obj)
478 type_pprinters=self.type_printers,
479 deferred_pprinters=self.deferred_printers)
--> 480 printer.pretty(obj)
481 printer.flush()
482 return stream.getvalue()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
361 if isinstance(meth, collections.Callable):
362 return meth(obj, self, cycle)
--> 363 return _default_pprint(obj, self, cycle)
364 finally:
365 self.end_group()
/usr/lib/python3/dist-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
481 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
482 # A user-provided repr.
--> 483 p.text(repr(obj))
484 return
485 p.begin_group(1, '<')
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __repr__(self)
136
137 def __repr__(self):
--> 138 data = list(self[:REPR_OUTPUT_SIZE + 1])
139 if len(data) > REPR_OUTPUT_SIZE:
140 data[-1] = "...(remaining elements truncated)..."
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __iter__(self)
160 - Responsible for turning the rows into model objects.
161 """
--> 162 self._fetch_all()
163 return iter(self._result_cache)
164
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in _fetch_all(self)
963 def _fetch_all(self):
964 if self._result_cache is None:
--> 965 self._result_cache = list(self.iterator())
966 if self._prefetch_related_lookups and not self._prefetch_done:
967 self._prefetch_related_objects()
/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in iterator(self)
236 # Execute the query. This will also fill compiler.select, klass_info,
237 # and annotations.
--> 238 results = compiler.execute_sql()
239 select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
240 compiler.annotation_col_map)
/usr/local/lib/python3.4/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
838 cursor = self.connection.cursor()
839 try:
--> 840 cursor.execute(sql, params)
841 except Exception:
842 cursor.close()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
77 start = time()
78 try:
---> 79 return super(CursorDebugWrapper, self).execute(sql, params)
80 finally:
81 stop = time()
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
/usr/local/lib/python3.4/dist-packages/django/db/utils.py in __exit__(self, exc_type, exc_value, traceback)
95 if dj_exc_type not in (DataError, IntegrityError):
96 self.wrapper.errors_occurred = True
---> 97 six.reraise(dj_exc_type, dj_exc_value, traceback)
98
99 def __call__(self, func):
/usr/local/lib/python3.4/dist-packages/django/utils/six.py in reraise(tp, value, tb)
656 value = tp()
657 if value.__traceback__ is not tb:
--> 658 raise value.with_traceback(tb)
659 raise value
660
/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
62 return self.cursor.execute(sql)
63 else:
---> 64 return self.cursor.execute(sql, params)
65
66 def executemany(self, sql, param_list):
ProgrammingError: could not identify an equality operator for type json
LINE 1: ...e"."domain_name", "reporter_logs"."rule_type", "reporter_...
^
Here is the output of the query that fails.
[{'sql': 'SELECT "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence", COUNT(CASE WHEN "reporter_logs"."action" = \'a\' THEN 1 WHEN "reporter_logs"."action" = \'ae\' THEN 1 ELSE 0 END) AS "allowed" FROM "reporter_logs" GROUP BY "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence" LIMIT 21', 'time': '0.002'}]
Any suggestions would be appreciated. Including suggestions about alternate ways to write the query.
Try by not using Q function, use separated When instead:
Logs.objects.annotate(
allowed=Count(Case(
When(action='a', then=1),
When(action='ae', then=1),
default=0,
output_field=IntegerField()
)
)).values('date')
Well, thanks #knbk and #Gocht for trying. I finally found the solution to my problem. I had to specify that I only wanted the domain_name column, which took care of the traceback.
Also, I had to use Sum instead of Count to return 0 for all domains that had no matches. Count returned 1 for nonmatching values.
Logs.objects.values('domain_name').annotate(
allowed=Sum(Case(
When(Q(action='a') |Q(action='ae'), then=Value(1)),
default=Value(0),
output_field=IntegerField()
)
))

Categories

Resources