Complex aggregation on relationships in SQLAlchemy - python

So, suppose there is this models:
class Country(Base):
__tablename__ = "countries"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
code = Column(String, index=True, nullable=False, unique=True)
name = Column(String, nullable=False)
class EventSource(Base):
__tablename__ = "eventsources"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
name = Column(String, nullable=False, unique=True)
class Event(Base):
__tablename__ = "events"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
country_id = Column(
UUID(as_uuid=True),
ForeignKey("countries.id", ondelete="CASCADE"),
nullable=False,
)
eventsource_id = Column(
UUID(as_uuid=True),
ForeignKey("eventsources.id", ondelete="CASCADE"),
nullable=False,
)
created_at = Column(DateTime(timezone=True), default=func.now())
And the task is to get counts of events for each country by eventsource. This is pretty easily done in raw SQL:
SELECT eventsource.name, countries.code, COUNT(events.id) as events_count
FROM events
JOIN countries ON events.country_id = countries.id
JOIN eventsources ON events.eventsource_id = eventsources.id
GROUP BY eventsources.name, countries.code;
So the result is that for each eventsource we have a count of events grouped by countries. Now the question is, how to properly setup models and make query in sqlalchemy (preferably in 2.0 style syntax) so that end result looks like a list of eventsource models where countries are relationships with aggregated events count, that can be accessed like in the last line of this next code block:
# Initialize our database:
country_en = Country(code="en", name="England")
country_de = Country(code="de", name="Germany")
eventsource_tv = EventSource(name="TV")
eventsource_internet = EventSource(name="Internet")
session.add(country_en)
session.add(country_de)
session.add(eventsource_tv)
session.add(eventsource_internet)
session.flush()
session.add(Event(country_id=country_en.id, eventsource_id = eventsource_tv.id)
session.add(Event(country_id=country_en.id, eventsource_id = eventsource_tv.id)
session.add(Event(country_id=country_en.id, eventsource_id = eventsource_tv.id)
session.add(Event(country_id=country_de.id, eventsource_id = eventsource_tv.id)
session.add(Event(country_id=country_en.id, eventsource_id = eventsource_internet.id)
session.add(Event(country_id=country_en.id, eventsource_id = eventsource_internet.id)
session.flush()
# Aggregate eventsources somehow:
eventsources = session.execute(select(EventSource).order_by(EventSource.name).all() # this is the line where some magick that solves problem should happen
# Print results. This line should output "2" (eventsource "Internet" for country "en"):
print(eventsources[0].countries[0].events_count)

For thouse who encounters the same problem, this is what I end up doing. Here is an example of relationship on a target that is a select query. My solution was to create query, and then map results to a custom class, based on the link above.
This is roughly what I've done (not exactly the code that I run, but something pretty similar):
class Country(Base):
__tablename__ = "countries"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
code = Column(String, index=True, nullable=False, unique=True)
name = Column(String, nullable=False)
class EventSource(Base):
__tablename__ = "eventsources"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
name = Column(String, nullable=False, unique=True)
class Event(Base):
__tablename__ = "events"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
country_id = Column(
UUID(as_uuid=True),
ForeignKey("countries.id", ondelete="CASCADE"),
nullable=False,
)
eventsource_id = Column(
UUID(as_uuid=True),
ForeignKey("eventsources.id", ondelete="CASCADE"),
nullable=False,
)
created_at = Column(DateTime(timezone=True), default=func.now())
country = relationship("Country")
#dataclass
class CountriesCount:
eventsource_id: UUID
country_code: str
events_count: int
events_counts_table = (
select(
Event.eventsource_id.label("eventsource_id"),
Country.code.label("country_code"),
func.count(Country.code).label("events_count"),
)
.select_from(Event)
.join(Country, Event.country)
.group_by(Event.eventsource_id, Country.code)
).alias()
EventSource.countries = relationship(
registry().map_imperatively(
CountriesCount,
events_counts_table,
primary_key=[
events_counts_table.c.eventsource_id,
events_counts_table.c.country_code,
],
),
viewonly=True,
primaryjoin=EventSource.id == events_counts_table.c.eventsource_id,
)

Related

SQLAlchemy ORM: Mapping a non-unique column to schema

How would you map a column that is not unique and is not a key into another schema(table)?
class TaskEntity(Base, BaseMixin):
__tablename__ = "task_entity"
__table_args__ = (UniqueConstraint("dag_no", "ordinal_position", name="dag_ordinal_uq_constraint"),)
task_no = Column(BIGINT(20), primary_key=True, autoincrement=True, nullable=False)
job_no = Column(BIGINT(20), ForeignKey("job_tb.job_no"), nullable=False)
task_name = Column(String(128), unique=True, nullable=False)
ordinal_position = Column(SMALLINT(6), nullable=False, default=1)
ordinal_position is not unique on its own, but is unique per task_no which is unique per job_no.
Ex) job_no.A can only have 1 of task_no.A which can only have 1 of ordinal_position.C. But job_no.B can have a task_no.A and ordinal_position.C.
I am trying to create the below schema in conjunction with class TaskEntity above, but am returning a "errno: 150 "Foreign key constraint is incorrectly formed" which I am assuing comes from the fact that ordinal_position is not unique.
class TaskLog(Base, BaseMixin):
__tablename__ = "task_log"
task_log_no = Column(BIGINT(20), nullable=False, autoincrement=True, primary_key=True)
execution_datetime = Column(TIMESTAMP, nullable=False)
start_datetime = Column(TIMESTAMP, nullable=False)
duration = Column(Float, nullable=False)
job_no = Column(BIGINT(20), ForeignKey("job_tb.job_no"), nullable=False)
task_no = Column(BIGINT(20), ForeignKey("task_entity.task_no"), nullable=False)
task_name = Column(String(128), ForeignKey("task_entity.task_name"), nullable=False)
# this is the declaration causing issues:
task_ordinal_position = Column(SMALLINT(6), ForeignKey("task_entity.ordinal_position"), nullable=False)
Have tried using relationships and "primary_join=", but the mapping seems to be very off once the data comes in.
Any inputs, much appreciated.
If I'm reading this correctly, then you probably want a UniqueConstraint across all three columns:
__table_args__ = (
UniqueConstraint('task_no', 'job_no', 'ordinal_position'),
)

How to apply UniqueConstraint to hierarchy table with specific and relationship at same time?

Table is this
class CustomGroups(Base):
__tablename__ = 'custom_groups'
id = Column(Integer, primary_key=True)
name = Column(String(80), unique=True, nullable=False)
last_used = Column(DateTime, default=datetime.utcnow())
created = Column(DateTime, default=datetime.utcnow())
images = relationship("Images", secondary=images_with_groups, back_populates="groups")
tags = relationship("Tags", secondary=custom_groups_with_tags, back_populates="groups")
children = relationship("CustomGroups", secondary='custom_groups_relationship',
primaryjoin='CustomGroups.id==CustomGroupsRelationship.parent_id',
secondaryjoin="CustomGroups.id==CustomGroupsRelationship.child_id",
backref="parent")
Relationship table
class CustomGroupsRelationship(Base):
__tablename__ = 'custom_groups_relationship'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey('custom_groups.id'))
child_id = Column(Integer, ForeignKey('custom_groups.id'))
created_at = Column(DateTime, default=datetime.now())
updated_at = Column(DateTime, default=datetime.now())
I want to have UniqueConstraint on CustomGroups.name and CustomGroups.parent at sametime.
But also when they are on same level, there shouldn't be duplicate names, similar to windows folder structure.
Thanks

Subquery/alias duplication in SqlAlchemy with polymorphic tables and "limit"

Given this polymorphic model
class OrganizationBase(Base):
__tablename__ = "x_organization_base"
__mapper_args__ = {
"polymorphic_identity": "base",
"polymorphic_on": "model_type",
}
model_type = db.Column(db.String(), nullable=False)
id = Column(Integer(), primary_key=True)
cont'd
class UmbrellaOrganization(OrganizationBase):
__tablename__ = "x_umbrella_organization"
__mapper_args__ = {"polymorphic_identity": "umbrella"}
id = db.Column(Integer, db.ForeignKey(OrganizationBase.id), primary_key=True)
umbrella_accounting_id = db.Column(db.String(255), nullable=False, unique=True)
properties = db.relationship(
"UmbrellaOrganizationProperty",
lazy="joined",
backref=backref("umbrella_organization", uselist=False),
)
class Organization(OrganizationBase):
__tablename__ = "x_organization"
__mapper_args__ = {"polymorphic_identity": "organization"}
id = db.Column(Integer, db.ForeignKey(OrganizationBase.id), primary_key=True)
umbrella_accounting_id = db.Column(
db.String(255),
db.ForeignKey(UmbrellaOrganization.umbrella_accounting_id),
nullable=False,
index=True,
)
and this eagerly loaded relationship
class UmbrellaOrganizationProperty(Base):
__tablename__ = "x_umbrella_organization_property"
id = Column(Integer(), primary_key=True)
umbrella_organization_id = db.Column(
Integer, db.ForeignKey(UmbrellaOrganization.id), nullable=False, index=True
)
type = db.Column(db.String(), nullable=False)
this query will produce invalid SQL:
query = (
db.session.query(
Organization,
UmbrellaOrganization,
)
.join(
UmbrellaOrganization,
UmbrellaOrganization.umbrella_accounting_id == Organization.umbrella_accounting_id,
)
)
y = query.limit(5)
Specically, there main query will be duplicated with the same alias 'anon_1' occuring twice:
ProgrammingError: (psycopg2.errors.DuplicateAlias) table name "anon_1" specified more than once
This only happens with limit() applied.
It appears that the polymorphism mapper wants to join the (eagerly loaded) UmbrellaOrganziationProperty to both the UmbrellaOrganization and OrganizationBase, even though it does not belong there. Without changing the model, the only way I have found to prevent this is telling it to not load OrganizationProperty eagerly, by adding this query option:
.options(lazyload(UmbrellaOrganization.properties))
This is potentially problematic because client code may expect the properties in the results. What else can I do?

SqlAlchemy. Cannot add two Foreign keys. There are multiple foreign key paths linking the tables

I'm new in sqlalchemy. I need to create two foreign keys in child (Student) class.
Now I can do something like this:
>>> student_one = Student(name='Sam')
>>> student_two = Student(name='Nick')
>>> group_one = Group(group_number='ST141', students=[student_one, student_two], senior_student=student_one)
>>> group_one.students
>>> group_one.senior_student
That would be correct. And now I want to have senior_student_of field in Student table. I've tried a lot of ways to do something with but couldn't add it.
This is my code:
class Group(Base):
__tablename__ = 'groups'
id = Column(Integer, primary_key=True)
group_number = Column(String(10), nullable=True, default='')
study_hours = Column(Integer, default=0, nullable=True)
lab_studies = Column(Integer, default=0, nullable=True)
pract_studies = Column(Integer, default=0, nullable=True)
curator = relationship('Tutor', backref='tutors', lazy='dynamic')
students = relationship('Student', backref='groups', lazy='dynamic')
senior_student = relationship('Student', uselist=False, lazy='joined')
def __repr__(self):
return self.group_number
class Student(Base):
__tablename__ = 'students'
id = Column(Integer, primary_key=True)
surname = Column(String(20), nullable=True, default='', )
name = Column(String(20), nullable=True, default='')
patronymic = Column(String(20), nullable=True, default='')
absences = Column(Integer, nullable=True, default=0)
undone_labs = Column(Integer, nullable=True, default=0)
unready_labs = Column(Integer, nullable=True, default=0)
group_id = Column(Integer, ForeignKey(Group.id))
group = relationship(Group)
# This don't work
senior_student_of_id = Column(Integer, ForeignKey(Group.id))
senior_student_of = relationship(Group)
Always get an error:
sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join condition between parent/child tables on relationship Group.students - there are multiple foreign key paths linking the tables. Specify the 'f
oreign_keys' argument, providing a list of those columns which should be counted as containing a foreign key reference to the parent table.
Finally, I'll need to get student_one.senior_student_of correctly and see ST141 there.
Couldn't search right solution. Thank you for help! :)
You can do:
group = relationship(Group, foreign_keys=[group_id, ])
senior_student_of = relationship(Group, foreign_keys=[senior_student_of_id, ])
Also remove the relationship definitions from Group. Use backref to define the backref, like this:
class Group(Base):
__tablename__ = 'groups'
id = Column(Integer, primary_key=True)
group_number = Column(String(10), nullable=True, default='')
study_hours = Column(Integer, default=0, nullable=True)
lab_studies = Column(Integer, default=0, nullable=True)
pract_studies = Column(Integer, default=0, nullable=True)
def __repr__(self):
return self.group_number
class Student(Base):
__tablename__ = 'students'
id = Column(Integer, primary_key=True)
surname = Column(String(20), nullable=True, default='', )
name = Column(String(20), nullable=True, default='')
patronymic = Column(String(20), nullable=True, default='')
absences = Column(Integer, nullable=True, default=0)
undone_labs = Column(Integer, nullable=True, default=0)
unready_labs = Column(Integer, nullable=True, default=0)
group_id = Column(Integer, ForeignKey(Group.id))
group = relationship(Group, foreign_keys=[group_id, ], backref="students")
senior_student_of_id = Column(Integer, ForeignKey(Group.id), backref="senior_student")
senior_student_of = relationship(Group, foreign_keys=[senior_student_of_id, ])
I have never used the uselist=False though, so I do not know it that works.
I am not sure if this is what you really meant:
students = relationship('Student', backref='groups', lazy='dynamic')
This will put the backref groups into the Student, which means you will have Student.groups. But you also define Student.group in the Student class.

how can I use a column that is not a Primary Key as a Foreign Key?

This might be a dumb question, but I can't find anything that indicates that you can't, however, when I try it, it does not work. I have an example of such an instance, but I can't reproduce the results.
Using SQLAlchemy, here is the working instance:
class Commissions(Base):
__tablename__ = "commissions"
id = Column(
Integer,
Sequence("commission_id", optional=True),
primary_key=True
)
commission_period_name = Column(
Unicode(40), ForeignKey("commission_periods.name"))
commission_period = relationship(
"CommissionPeriods",
primaryjoin="CommissionPeriods.name == Commissions.commission_period_name",
uselist=False,
backref="commissions"
)
agent_id = Column(Integer, ForeignKey("agents.id"))
agent = relationship(
Agents,
primaryjoin=Agents.id == agent_id,
uselist=False,
backref="commissions"
)
create_stamp = Column(DateTime)
commission_type = Column(Unicode(40))
amount = Column(Numeric(10, 2))
transactions_id = Column(Integer, ForeignKey(Transactions.id))
transaction = relationship(Transactions, primaryjoin=Transactions.id ==
transactions_id, backref="commissions", uselist=False)
Note the commission_period uses name, not an id as reference.
Here is the CommissionPeriods table definition:
class CommissionPeriods(Base):
__tablename__ = "commission_periods"
id = Column(
Integer,
Sequence("commission_periods_id", optional=True),
primary_key=True
)
name = Column(Unicode(40), index=True)
start_date = Column(DateTime(), index=True)
end_date = Column(DateTime(), index=True)
network = Column(Unicode(40), index=True)
status = Column(Unicode(40), index=True)
created_by = Column(Unicode(40))
create_stamp = Column(DateTime())
modify_stamp = Column(DateTime())
The alembic scripts runs without errors.
I have tried to replicate the results, with different tables (more or less the same table structures, with a name column that I am trying to use as a FK), but I have had no luck - on the python side everything is okay, but once I try to run the alembic scripts it tells me that the Foreign key is incorrectly formed.
Can someone please explain to me how this works, why it works in the given instance but not when I try to replicate the results from the above?
This is what I have tried in replicating the results:
class Networks(Base):
__tablename__ = "networks"
id = Column(
Integer,
Sequence('networks_id', optional=True),
primary_key=True
)
name = Column(Unicode(40), index=True)
rica_name = Column(Unicode(40))
net_iccid_start = Column(Integer)
net_iccid_end = Column(Integer)
net_iccid_int = Column(Integer)
network_class = Column(Unicode(60))
_config = Column("config", Unicode(2048))
Note that in the above table I want to use the name column as foreign key in:
class AgentRecharges(Base):
__tablename__ = "agent_recharges"
id = Column(
Integer,
Sequence('agent_recharges_id', optional=True),
primary_key=True
)
status = Column(Unicode(40))
create_stamp = Column(DateTime, index=True)
create_by = Column(Integer, ForeignKey(Agents.id))
create_by_agent = relationship(
Agents, primaryjoin=Agents.id == create_by, uselist=False)
modify_stamp = Column(DateTime, index=True)
complete_stamp = Column(DateTime, index=True)
msisdn = Column(Unicode(20), index=True, nullable=False)
amount = Column(Float, index=True, nullable=False)
network_name = Column(Unicode(40), ForeignKey(
"networks.name"), nullable=False)
network = relationship(
"Networks", primaryjoin="Networks.name == AgentRecharges.network_name", uselist=False)
iccid = Column(Unicode(40))
sim = relationship(Sims, backref="agent_recharges")
agents_id = Column(Integer, ForeignKey(Agents.id))
agent = relationship(Agents, primaryjoin=Agents.id ==
agents_id, uselist=False)
transactions_id = Column(Integer, ForeignKey(Transactions.id))
transaction = relationship(Transactions, primaryjoin=Transactions.id ==
transactions_id, backref="agent_recharges", uselist=False)
recharge_batch_id = Column(Integer, ForeignKey(RechargeBatches.id))
recharge_batch = relationship(RechargeBatches)
When I run the alembic script to add this new table, it tells me that the foreign key is incorrectly formed.
Any ideas on why, and how I can accomplish my goal?

Categories

Resources