results from an sqlachemy query as iterator - python

I am struggling to create an iterator from a query from sqlalchemy.
Here is what I tried so far
create a table
from sqlalchemy import create_engine, Column, MetaData, Table , Integer, String
engine = create_engine('sqlite:///test90.db')
conn = engine.connect()
metadata = MetaData()
myTable = Table('myTable', metadata,
Column('Doc_id', Integer, primary_key=True),
Column('Doc_Text', String))
metadata.create_all(engine)
conn.execute(myTable.insert(), [{'Doc_id': 1, 'Doc_Text' : 'first sentence'},
{'Doc_id': 2, 'Doc_Text' : 'second sentence'},
{'Doc_id': 3, 'Doc_Text' : 'third sentence'},
{'Doc_id': 4, 'Doc_Text' : 'fourth sentence'}
])
I read everything I could on iterator but do not get it.
Here the class I created to get an iterator but it does not work
(it overflows although I specify a break)
from sqlalchemy import create_engine
class RecordsIterator:
def __init__(self, xDB, xSQL):
self.engine = create_engine(xDB)
self.conn = self.engine.connect()
self.xResultCollection = self.conn.execute(xSQL)
def __iter__(self):
return self
def next (self):
while self.xResultCollection.closed is False:
xText = (self.xResultCollection.fetchone())[1]
xText = xText.encode('utf-8')
yield xText.split()
if not self.xResultCollection:
break
x1 = RecordsIterator(xDB = 'sqlite:///test91.db', xSQL = 'select * from myTable')
In case you are wondering why I am not just using a generator .
I need to feed the iterator in gensim.Word2Vec and unfortunately, it does not take a generator
import gensim
gensim.models.Word2Vec(x1)
Thanks in advance

Your check if not self.xResultCollection will always return False, as the truth value of the result object will always be True.
In your next method you have a for and a while loop, which shouldn't really be needed, the next method should just return one element, there's no need for a loop there.
As self.xResultCollection is itself an iterable you could just do:
class RecordsIterator:
def __init__(self, xDB, xSQL):
self.engine = create_engine(xDB)
self.conn = self.engine.connect()
self.resultIterator = iter(self.conn.execute(xSQL))
def __iter__(self):
return self
def next (self):
return next(self.resultIterator)[1].encode('utf-8').split()

For those interested in a using this with gensim.
It turns out that the problem was that gensim wants an iterator, on which we can return (iterating over results of a query cursor, consumes it).
see discussions here
this is what seems to work for me
import gensim
from sqlalchemy import create_engine
xDB = 'sqlite:///test91.db'
xSQL = 'select * from myTable'
engine = create_engine(xDB)
conn = engine.connect()
xResultIterator = conn.execute(xSQL)
class MyIterator(object):
def __init__(self, xResults, xNrCol):
self.xResults = xResults
self.xNrCol = xNrCol
def __iter__(self):
for xRecord in self.xResults:
xText = (xRecord[self.xNrCol]).lower().encode('utf8')
xToken = xText.split()
if not xToken:
continue
yield xToken
self.xResults = conn.execute(xSQL) ### THIS SEEMS TO FIX IT
#to use
q1 = MyIterator(xResultIterator, xNrCol = 1)
model = gensim.models.Word2Vec(sentences = q1 , min_count = 1)
and here the vocabulary
model.vocab.keys()
I run this on a postgresql with 1 Million entries (titles of scientific papers) in about 90 seconds without problem
I hope this will help someone else

Related

Method cannot access class variable of different class

I am writing an algorithm in Python that is supposed to sort children (out of a database table) into one of their chosen kindergarten wishes (also out of a database table) following certain criteria on who to guarantee a place in their chosen kindergarten first. For this I first wrote a KitaDAO class to link the programme to the database and fetch information out of certain tables, saving them as an object.
import pymysql
import json
from Kita import Kita
from Kind import Kind
from Element import Element
class KitaDAO():
def __init__(self):
self.db = pymysql.connect("localhost","projekt","projekt","kita" )
self.cursor = self.db.cursor()
self.kitaList = []
self.kinderList = []
def getKitas(self):
self.sql = "SELECT * FROM kitas"
try:
self.cursor.execute(self.sql)
self.results = self.cursor.fetchall()
for row in self.results:
thisKita = Kita(row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8])
self.kitaList.append(thisKita)
except Exception as e:
print (e)
return self.kitaList
def getWarteliste(self):
self.sql = "SELECT * FROM warteliste"
self.warteliste = []
try:
self.cursor.execute(self.sql)
self.results = self.cursor.fetchall()
for row in self.results:
thisElement = Element(row[0],row[1],row[2],row[3],row[4],row[5],row[6])
self.warteliste.append(thisElement)
except Exception as e:
print (e)
return self.warteliste
def getKinder(self):
self.sql = "SELECT * FROM kinderprofil"
try:
self.cursor.execute(self.sql)
self.results = self.cursor.fetchall()
for row in self.results:
thisKind = Kind(row[0],row[1],row[2],row[3],row[4],row[5],row[6])
self.kinderList.append(thisKind)
except Exception as e:
print (e)
return self.kinderList
def getKindOnWarteliste(self,kita,wunschnummer):
self.kinderList = []
self.warteliste = []
self.warteliste = self.getWarteliste()
if (wunschnummer == 1):
for i in self.warteliste:
if (kita == i.getWunsch1()):
self.kinderList.append(i.getKind())
elif (wunschnummer == 2):
for i in self.warteliste:
if (kita == i.getWunsch2()):
self.kinderList.append(i.getKind())
elif (wunschnummer == 3):
for i in self.warteliste:
if (kita == i.getWunsch3()):
self.kinderList.append(i.getKind())
else:
print("Error: Eine ungültige Wunschnummer wurde übergeben.")
return self.kinderList
If needed I can also post the classes Element, Kind and Kita in here but they basically only contain an __init__ method and if needed a get method. They also work, I have tested that before.
My problem is now, that in my main class called Sortierung I made thisDAO an instance of KitaDAO and want to use it to call methods and such, as normally. Sadly the class variable thisDAO is not accessible in a method of Sortierung. So basically this code has the response:
File "Sortierung.py", line 3, in <module> class Sortierung():
File "Sortierung.py", line 30, in Sortierung checkBetreuung(i,warteliste)
File "Sortierung.py", line 11, in checkBetreuung KinderObjektListe = thisDAO.getKinder()
nameError: name 'thisDAO' is not defined
I marked the lines in the code under here.
from KitaDAO import KitaDAO
class Sortierung(): #---------- This is line 3
kitas = []
thisDAO = KitaDAO()
kitas = thisDAO.getKitas()
def checkBetreuung(kita,kinderIDListe):
KinderObjektListe = []
KinderObjektListe = thisDAO.getKinder() #---------This is line 11
#left something out here that was irrelevant
for x in range(1,4):
for i in kitas:
warteliste = []
warteliste = thisDAO.getKindOnWarteliste(i.getID,x)
checkBetreuung(i,warteliste) #-------------This is line 30
Also BTW I am German that is why the variable names are all in German. Sorry :)
You don't need the Sortierung class at all (this is not Java; not everything needs to be encapsulated in a class) – the root problem is thisDAO ends up being a class attribute of it.
Something like
from KitaDAO import KitaDAO
thisDAO = KitaDAO()
kitas = thisDAO.getKitas()
def checkBetreuung(kita, kinderIDListe):
KinderObjektListe = thisDAO.getKinder()
for x in range(1,4):
for i in kitas:
warteliste = thisDAO.getKindOnWarteliste(i.getID(), x)
checkBetreuung(i, warteliste)
should do the trick, barring any other problems.

sqlalchemy: Passign a form input to a filter does not return any results

db = create_engine('mysql://usr:passwd#localhost/DB',
isolation_level="READ UNCOMMITTED")
metadata = MetaData(db)
Session = sessionmaker(bind=db)
session = Session()
server = Table('server', metadata, autoload=True)
if (model is not None):
# The below == works and returns results
#s = session.query(server).filter(server.c.product == model)
#This returns nothing
s = session.query(server).filter(server.c.product.like("%model%")).all()
model is passed as a form parameter
#app.route('/serverHandleList', methods=['POST'])
def serverHandleList():
model = request.form['inputModelName']
serverItems = getModel(model)
return render_template('some.html', items=serverItems, name=model)
What am I doing wrong? Any help much appreciated. Thanks!
You are currently searching for the word "model" instead of what's being passed in the variable model.
Change to this instead:
s = session.query(server).filter(server.c.product.like("%{}%".format(model))).all()

How can I get keyword arguments in sqlalchemy.FunctionElement?

I am trying to create a function which would produce statement equivalent to datetime.utcnow() + timedelta(days=5, minutes=4). I want to be able to call it like utc_after(days=5, minutes=4).
It should be similar to utcnow(), as described in SQLAlchemy documentation.
Here is an example what I got working so far (one dialect only for brevity):
from sqlalchemy.sql.expression import FunctionElement
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.types import DateTime
class utc_after(FunctionElement):
type = DateTime()
name = 'utc_after'
#compiles(utc_after, 'sqlite')
def sqlite_utc_after(element, compiler, **kwargs):
days, hours = list(element.clauses)
return "datetime('now', '+%s day', '+%s hours')" % (days.value, hours.value)
It works. I can use it as in:
week_after_submission = db.Column(db.DateTime, default=utc_after(7, 0))
Obviously this is only a stub of the final code (it needs +/- formatting, minutes, seconds and so on).
The question is: how can I use keyword arguments from FunctionElement so I could have:
next_week = db.Column(db.DateTime, default=utc_after(days=7))
When I specify utc_after(days=7), element.clauses is empty.
So far I tried using kwargs from sqlite_utc_after (these are empty), digging through element properties, and searching clues in documentation, without results.
Set the keyword argument as a property.
from sqlalchemy.sql.functions import GenericFunction
from sqlalchemy.ext.compiler import compiles
from sqlalchemy import Table, Column, String, MetaData
class last_value(GenericFunction):
def __init__(self, *clauses, ignore_nulls=False, **kwargs):
self.ignore_nulls = ignore_nulls
super().__init__(*clauses, **kwargs)
#compiles(last_value)
def visit_last_value(element, compiler, **kwargs):
clauses = element.clauses
ignore_nulls = element.ignore_nulls
ignore_nulls_clause = ' ignore nulls' if ignore_nulls else ''
return 'last_value(%s%s)' % (compiler.process(clauses), ignore_nulls_clause)
def test_last_value():
m = MetaData()
t = Table('test', m, Column('a', String), Column('b', String))
c = func.last_value(t.c.a, ignore_nulls=True)
s = select([c])
assert ' '.join(str(s).split()) == 'SELECT last_value(test.a ignore nulls) AS last_value_1 FROM test'

pymongo- upsert not able to perform insertion with $set operation

I am having an empty collection and have thousands of entries to process (entries might have redudancy for which I want to use both updates and inserts).
The python code (using pymongo) I wrote:
for mydoc in alldocs:
key = {'myid': mydoc['myid']}
data = process_doc(mydoc) # returns simple dictionary
db.mydocs.update(key, {"$set": data}, upsert = True)
The following code is unable to perform any insert operations. The collection still remains empty. But when I remove $set and use simply data, it works fine. Can't I use $set in upsert? The reason why I want $set was so that pre-existing fields for a BSON doesn't get affected. Can someone please guide. I really can't figure out what to do.
Reproducable code:
from pymongo import Connection
DB_CONTENT_BASE_KEY = 'contentbase'
def connect_to_db(dbname, hostname = 'localhost', portno = 27017, **kwargs):
connection = Connection(hostname, portno)
dbConnection = connection[dbname]
return dbConnection
class MetawebCustomCollectionBuilder(object):
# key ought to be a dictionary to filter results from contentbase.
def __init__(self, inDbConfig, outDbConfig, key = {}, verbose = False):
self.verbose = verbose
self.inDbConfig = inDbConfig
self.inDb = connect_to_db(**inDbConfig)
self.outDbConfig = outDbConfig
self.outDb = connect_to_db(**outDbConfig)
self.inDbContentBase = self.inDb[self.inDbConfig[DB_CONTENT_BASE_KEY]]
self.outDbContentBase = self.outDb[self.outDbConfig[DB_CONTENT_BASE_KEY]]
self.key = key
self.in_db_collection_constraints()
self.out_db_collection_constraints()
def in_db_collection_constraints(self):
self.inDbContentBase.ensure_index('mid')
if self.verbose: print("Assured index on mid for inDbContentBase...")
def out_db_collection_constraints(self):
self.outDbContentBase.ensure_index('mid')
if self.verbose: print("Assured index on mid for outDbContentBase...")
def process_in_record(self, inRecord):
outRecord = inRecord # [YET TO] continue from here...
return outRecord
def transit_collection(self):
for record in self.inDbContentBase.find(self.key):
outRecord = self.process_in_record(record)
key = {'mid':outRecord['mid']}
data = outRecord
print key
self.outDbContentBase.update(key, {"$set": data}, True)
if self.verbose: print 'Done with transiting collection from in DB to out DB'
def cleanup_out_collection(self):
pass
def in_db_sandbox(self):
# To have tests and analytics placed in here corresponding to inDb.
pass
if __name__ == '__main__':
inDbConfig = {'dbname':'metaweb', 'contentbase': 'content'}
outDbConfig = {'dbname': 'similarkind', 'contentbase': 'content'}
mccb = MetawebCustomCollectionBuilder(inDbConfig, outDbConfig, verbose = True)
mccb.transit_collection()
There must be a prexisting database inDb. From this collection I want to create a new modified collection.
Your claim is wrong
>>> import pymongo
>>> c = pymongo.Connection()
>>> db = c.mydb
>>> db.mydocs.find().count()
0
>>> db.mydocs.update({'myid': '438'}, {"$set": {'keyA':'valueA'}}, upsert = True)
>>> db.mydocs.find().count()
1
>>> db.mydocs.find_one()
{u'myid': u'438', u'keyA': u'valueA', u'_id': ObjectId('504c2fd1a694cc9624bbd6a2')}

python autocomplete skip and just call function

I have a little GUI that has an autocomplete set up like this:
completion_liststore = create_completion_model()
completion = Gtk.EntryCompletion()
completion.set_model(completion_liststore)
completion.set_text_column(0)
completion.set_match_func(match_anywhere, None)
builder.get_object('student_change').set_completion(completion)
completion.connect('match-selected', self.populate_main)
The completion model and matching look like this in my handler class:
def create_completion_model():
db = sqlite3.connect('gradebook.db')
cursor = db.cursor()
cursor.execute('SELECT * from students')
students = cursor.fetchall()
names = Gtk.ListStore(str)
for student in students:
names.append([student[5]])
cursor.close()
return names
def match_anywhere(completion, entrystr, iter, data):
modelstr = completion.get_model()[iter][0]
return entrystr in modelstr.lower()
Pretty straight forward and works well. What I would like to do is just call my populate_main function directly, but it only seems to work through my autocomplete. My populate_main function looks like:
def populate_main(self, completion, treemodel, treeiter):
name = treemodel[treeiter][completion.get_text_column()]
db = sqlite3.connect('gradebook.db')
db.row_factory = dict_factory
cursor = db.cursor()
t = (name,)
cursor.execute('SELECT * from students WHERE name=?', t)
.... <-- and so on, filling in my form
is there a way to reuse this function without just copy/pasting into a new function and just sending the name to the function

Categories

Resources