Reset index name in elasticsearch dsl - python

I'm trying to create an ETL that extracts from mongo, process the data and loads into elastic. I will do a daily load so I thought of naming my index with the current date. This will help me for a later processing I need to do with this first index.
I used elasticsearch dsl guide: https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html
The problem that I have comes from my little experience with working with classes. I don't know how to reset the Index name from the class.
Here is my code for the class (custom_indices.py):
from elasticsearch_dsl import Document, Date, Integer, Keyword, Text
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Search
import datetime
class News(Document):
title = Text(analyzer='standard', fields={'raw': Keyword()})
manual_tagging = Keyword()
class Index:
name = 'processed_news_'+datetime.datetime.now().strftime("%Y%m%d")
def save(self, ** kwargs):
return super(News, self).save(** kwargs)
def is_published(self):
return datetime.now() >= self.processed
And this is the part of the code where I create the instance to that class:
from custom_indices import News
import elasticsearch
import elasticsearch_dsl
from elasticsearch_dsl.connections import connections
import pandas as pd
import datetime
connections.create_connection(hosts=['localhost'])
News.init()
for index, doc in df.iterrows():
new_insert = News(meta={'id': doc.url_hashed},
title = doc.title,
manual_tagging = doc.customTags,
)
new_insert.save()
Every time I call the "News" class I would expect to have a new name. However, the name doesn't change even if I load the class again (from custom_indices import News). I know this is only a problem I have when testing but I'd like to know how to force that "reset". Actually, I originally wanted to change the name outside the class with this line right before the loop:
News.Index.name = "NEW_NAME"
However, that didn't work. I was still seeing the name defined on the class.
Could anyone please assist?
Many thanks!
PS: this must be just an object oriented programming issue. Apologies for my ignorance on the subject.

Maybe you could take advantage of the fact that Document.init() accepts an index keyword argument. If you want the index name to get set automatically, you could implement init() in the News class and call super().init(...) in your implementation.
A simplified example (python 3.x):
from elasticsearch_dsl import Document
from elasticsearch_dsl.connections import connections
import datetime
class News(Document):
#classmethod
def init(cls, index=None, using=None):
index_name = index or 'processed_news_' + datetime.datetime.now().strftime("%Y%m%d")
return super().init(index=index_name, using=using)

You can override the index when you call save() .
new_insert.save('processed_news_' + datetime.datetime.now().strftime("%Y%m%d"))

Example as following.
# coding: utf-8
import datetime
from elasticsearch_dsl import Keyword, Text, \
Index, Document, Date
from elasticsearch_dsl.connections import connections
HOST = "localhost:9200"
index_names = [
"foo-log-",
"bar-log-",
]
default_settings = {"number_of_shards": 4, "number_of_replicas": 1}
index_settings = {
"foo-log-": {
"number_of_shards": 40,
"number_of_replicas": 1
}
}
class LogDoc(Document):
level = Keyword(ignore_above=256)
date = Date(format="yyyy-MM-dd'T'HH:mm:ss.SSS")
hostname = Text(fields={'fields': Keyword(ignore_above=256)})
message = Text()
createTime = Date(format="yyyy-MM-dd'T'HH:mm:ss.SSS")
def auto_create_index():
'''自动创建ES索引'''
connections.create_connection(hosts=[HOST])
for day in range(3):
dt = datetime.datetime.now() + datetime.timedelta(days=day)
for index in index_names:
name = index + dt.strftime("%Y-%m-%d")
settings = index_settings.get(index, default_settings)
idx = Index(name=name)
idx.document(LogDoc)
idx.settings(**settings)
try:
idx.create()
except Exception as e:
print(e)
continue
print("create index %s" % name)
if __name__ == '__main__':
auto_create_index()

Related

Access STEP Instance ID's with PythonOCC

Let's suppose I'm using this STEP file data as input:
#417=ADVANCED_FACE('face_1',(#112),#405,.F.);
#418=ADVANCED_FACE('face_2',(#113),#406,.F.);
#419=ADVANCED_FACE('face_3',(#114),#407,.F.);
I'm using pythonocc-core to read the STEP file.
Then the following code will print the names of the ADVANCED_FACE instances (face_1,face_2 and face_3):
from OCC.Core.STEPControl import STEPControl_Reader
from OCC.Core.TopExp import TopExp_Explorer
from OCC.Core.TopAbs import TopAbs_FACE
from OCC.Core.StepRepr import StepRepr_RepresentationItem
reader = STEPControl_Reader()
tr = reader.WS().TransferReader()
reader.ReadFile('model.stp')
reader.TransferRoots()
shape = reader.OneShape()
exp = TopExp_Explorer(shape, TopAbs_FACE)
while exp.More():
s = exp.Current()
exp.Next()
item = tr.EntityFromShapeResult(s, 1)
item = StepRepr_RepresentationItem.DownCast(item)
name = item.Name().ToCString()
print(name)
How can I access the identifiers of the individual shapes? (#417,#418 and #419)
Minimal reproduction
https://github.com/flolu/step-occ-instance-ids
Create a STEP model after reader.TransferRoots() like this:
model = reader.StepModel()
And access the ID like this in the loop:
id = model.IdentLabel(item)
The full code looks like this and can also be found on GitHub:
from OCC.Core.STEPControl import STEPControl_Reader
from OCC.Core.TopExp import TopExp_Explorer
from OCC.Core.TopAbs import TopAbs_FACE
from OCC.Core.StepRepr import StepRepr_RepresentationItem
reader = STEPControl_Reader()
tr = reader.WS().TransferReader()
reader.ReadFile('model.stp')
reader.TransferRoots()
model = reader.StepModel()
shape = reader.OneShape()
exp = TopExp_Explorer(shape, TopAbs_FACE)
while exp.More():
s = exp.Current()
exp.Next()
item = tr.EntityFromShapeResult(s, 1)
item = StepRepr_RepresentationItem.DownCast(item)
label = item.Name().ToCString()
id = model.IdentLabel(item)
print('label', label)
print('id', id)
Thanks to temurka1 for pointing this out!
I was unable to run your code due to issues installing the pythonocc module, however, I suspect that you should be able to inspect the StepRep_RepresentationItem object (prior to string conversion) by traversing __dict__ on it to discover/access whatever attributes/properties/methods of the object you may need:
entity = tr.EntityFromShapeResult(s, 1)
item = StepRepr_RepresentationItem.DownCast(entity)
print(entity.__dict__)
print(item.__dict__)
If necessary the inspect module exists to pry deeper into the object.
References
https://docs.python.org/3/library/stdtypes.html#object.__dict__
https://docs.python.org/3/library/inspect.html
https://github.com/tpaviot/pythonocc-core/blob/66d6e1ef6b7552a1110a90e86a1ed34eb12ecf16/src/SWIG_files/wrapper/StepElement.pyi

python class managing in another class

I have a class called martCrawler which import other 3 crawlers from other files
However, the code becomes too long as the crawler imported increased.
Is there a better practice managing the code like this?
Thanks in advance~
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
import datetime
class martCrawler():
def __init__(self):
self.wat = crawler_watsons()
self.cos = crawler_cosmed()
self.pxm = crawler_pxmart()
def crawler_testing(self):
result_pack = {}
wat_result = self.wat.run_before_insert()
cos_result = self.cos.run_before_insert()
pxm_result = self.pxm.run_before_insert()
result_pack['wat'] = wat_result
result_pack['cos'] = cos_result
result_pack['pxm'] = pxm_result
return result_pack
...
Then why not store all the crawlers in a dict from the beginning?
As an example:
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
class MartCrawler():
def __init__(self, *args):
self.crawlers = {}
for crawler in args:
# use some introspection to get the name of the classes.
# The names should be "crawler_watsons", etc
self.crawlers[crawler.__name__] = crawler()
def crawler_testing(self):
result_pack = {}
for name, crawler in self.crawlers.items():
result_back[name] = crawler.run_before_insert()
return result_back
martCrawler = MartCrawler(crawler_watsons, crawler_cosmed, crawler_pxmart)
Just have in mind that the names in your dict will be the actual names of the imported classes, not 'wat', 'pos' and 'pxm'. But if this is a problem you can use some string manipulation and/or regexes to fix it.
For example you could replace crawler.__name__ with crawler.__name__[8:11]
Just put them into a dictionary:
from Scripts_mart.wat_ver2 import crawler_watsons
from Scripts_mart.cosmed_ver1 import crawler_cosmed
from Scripts_mart.pxmart_ver1 import crawler_pxmart
import datetime
class martCrawler():
def __init__(self):
self.crawlers = {
"wat": crawler_watsons(),
"cos": crawler_cosmed(),
"pxm": crawler_pxmart()
}
def crawler_testing(self):
result_pack = {}
for key in self.crawlers:
result_pack[key] = self.crawlers[key].run_before_insert()
return result_pack

Error while calling method defined inside a class

I am trying to create a class and I can't seem to get it to work? I'm fairly new to Python, so any assistance would be appreciated. Also, not sure if this is the most efficient way to create and use an object. I am trying to build a well model and this is one piece of that model, once I get this simple issue figured out the rest should be fairly easy. Thanks.
import sys
import os
import csv
import pyodbc
import pandas as pd
import pandas.io.sql as psql
from pandas import Series, DataFrame
from time import gmtime, strftime
#Drill Pipe Class
class DP:
#Properties
DP_ID = 1.00
DP_OD = 1.00
DP_Name = 'Drill Pipe'
#test global
idwel = '6683AFCEA5DF429CAC123213F85EB9B3'
#Constructor <- Accepts idwell to get info
def __init__(self,idwell):
self.id = idwell
#..
#WV DB connecton Function -> return as dataframe -Updated 7/5/17
def WV_Read_Query(_query):
try:
cnxn = pyodbc.connect("DSN=SQL_R_WV")
cur = cnxn.cursor()
df = psql.read_sql(_query, cnxn)
cnxn.close()
#print(df)
return df
except "Error":
return "Query Error...!"
#..
def get_DP_Data(_id):
_id = str(_id)
DP_Query = """Select Top 1
DS.des as 'dp_name',DS.SZIDNOM as 'dp_id',
DS.SZODNOM as 'dp_od',DS.SYSCREATEDATE as 'date'
From [dbo].[US_WVJOBDRILLSTRINGCOMP] DS
Where IDWELL = '""" + _id +"""'
AND Des = 'Drill Pipe' Order by SYSCREATEDATE Desc"""
mud_Data = WV_Read_Query(DP_Query)
return mud_Data
#..
DP_Table = get_DP_Data(id)
def get_DP_ID(self, DP_Table):
dp_id = DP_Table['dp_id']
return dp_id
#..
def get_DP_OD(self, DP_Table):
dp_od = DP_Table['dp_od']
return dp_od
#..
def get_Date(self, DP_Table):
u_date = DP_Table['date']
return u_date
#..
def get_Des(self, DP_Table):
des = DP_Table['dp_name']
return des
#..
#Print DP Info
def DP_Info(self):
Des = get_Des()
ID = get_DP_ID()
OD = get_DP_OD()
Updated = strftime("%Y-%m-%d %H:%M:%S", gmtime())
return Des + "\nDP Id:\t" + ID + "\nDP Id:\t" + OD + "\nUpdated:\t" + Updated
#..
#...
dp = DP('6683AFCEA5DF429CAC123213F85EB9B3')
dp_info = dp.DP_Info()
print(dp_info)
Traceback (most recent call last): File "u:\Development\Python
Scripts\HCP\CUC Export Files 8_7_17\Well_Model.py", line 71, in
class DP: File "u:\Development\Python Scripts\HCP\CUC Export Files 8_7_17\Well_Model.py", line 108, in DP
DP_Table = get_DP_Data(id) File "u:\Development\Python Scripts\HCP\CUC Export Files 8_7_17\Well_Model.py", line 104, in
get_DP_Data
mud_Data = WV_Read_Query(DP_Query) NameError: name 'WV_Read_Query' is not defined
If you are defining non-static, non-class methods within a function, the first argument is always an instance of that class. We usually call this argument self:
def WV_Read_Query(self, _query):
...
And,
def get_DP_Data(self, _id):
Furthermore, you call a these methods on the object self:
self.WV_Read_Query(DP_Query)
You might wonder why the function is defined with 2 arguments, but only 1 passed to. That's because the instance is implicitly passed as the first parameter, automatically.
This is equivalent to
DP.WV_Read_Query(self, DP_Query)
Where you call the method on the class, but explicitly pass the instance to it.
Further reading:
Python classes
What is the difference between class and instance methods?
You need to access it with self. So
def get_DP_Data(self, _id):
_id = str(_id)
DP_Query = """Select Top 1
DS.des as 'dp_name',DS.SZIDNOM as 'dp_id',
DS.SZODNOM as 'dp_od',DS.SYSCREATEDATE as 'date'
From [dbo].[US_WVJOBDRILLSTRINGCOMP] DS
Where IDWELL = '""" + _id +"""'
AND Des = 'Drill Pipe' Order by SYSCREATEDATE Desc"""
mud_Data = self.WV_Read_Query(DP_Query)
return mud_Data
You also need to add self to several of your methods. The class instance will always be the first parameter in a method unless you define it as a staticmethod using the decorator staticmethod.

How to JSON dump to a rotating file object

I'm writing a program which periodically dumps old data from a RethinkDB database into a file and removes it from the database. Currently, the data is dumped into a single file which grows without limit. I'd like to change this so that the maximum file size is, say, 250 Mb, and the program starts to write to a new output file just before this size is exceeded.
It seems like Python's RotatingFileHandler class for loggers does approximately what I want; however, I'm not sure whether logging can be applied to any JSON-dumpable object or just to strings.
Another possible approach would be to use (a variant of) Mike Pennington's
RotatingFile class (see python: outfile to another text file if exceed certain file size).
Which of these approaches is likely to be the most fruitful?
For reference, my current program is as follows:
import os
import sys
import json
import rethinkdb as r
import pytz
from datetime import datetime, timedelta
import schedule
import time
import functools
from iclib import RethinkDB
import msgpack
''' The purpose of the Controller is to periodically archive data from the "sensor_data" table so that it does not grow without limit.'''
class Controller(RethinkDB):
def __init__(self, db_address=(os.environ['DB_ADDR'], int(os.environ['DB_PORT'])), db_name=os.environ['DB_NAME']):
super(Controller, self).__init__(db_address=db_address, db_name=db_name) # Initialize the IperCronComponent with the default logger name (in this case, "Controller")
self.db_table = RethinkDB.SENSOR_DATA_TABLE # The table name is "sensor_data" and is stored as a class variable in RethinkDBMixIn
def generate_archiving_query(self, retention_period=timedelta(days=3)):
expiry_time = r.now() - retention_period.total_seconds() # Timestamp before which data is to be archived
if "timestamp" in r.table(self.db_table).index_list().run(self.db): # If "timestamp" is a secondary index
beginning_of_time = r.time(1400, 1, 1, 'Z') # The minimum time of a ReQL time object (i.e., the year 1400 in the UTC timezone)
data_to_archive = r.table(self.db_table).between(beginning_of_time, expiry_time, index="timestamp") # Generate query using "between" (faster)
else:
data_to_archive = r.table(self.db_table).filter(r.row['timestamp'] < expiry_time) # Generate the same query using "filter" (slower, but does not require "timestamp" to be a secondary index)
return data_to_archive
def archiving_job(self, data_to_archive=None, output_file="archived_sensor_data.json"):
if data_to_archive is None:
data_to_archive = self.generate_archiving_query() # By default, the call the "generate_archiving_query" function to generate the query
old_data = data_to_archive.run(self.db, time_format="raw") # Without time_format="raw" the output does not dump to JSON
with open(output_file, 'a') as f:
ids_to_delete = []
for item in old_data:
print item
# msgpack.dump(item, f)
json.dump(item, f)
f.write('\n') # Separate each document by a new line
ids_to_delete.append(item['id'])
r.table(self.db_table).get_all(r.args(ids_to_delete)).delete().run(self.db) # Delete based on ID. It is preferred to delete the entire batch in a single operation rather than to delete them one by one in the for loop.
def test_job_1():
db_name = "ipercron"
table_name = "sensor_data"
port_offset = 1 # To avoid interference of this testing program with the main program, all ports are initialized at an offset of 1 from the default ports using "rethinkdb --port_offset 1" at the command line.
conn = r.connect("localhost", 28015 + port_offset)
r.db(db_name).table(table_name).delete().run(conn)
import rethinkdb_add_data
controller = Controller(db_address=("localhost", 28015+port_offset))
archiving_job = functools.partial(controller.archiving_job, data_to_archive=controller.generate_archiving_query())
return archiving_job
if __name__ == "__main__":
archiving_job = test_job_1()
schedule.every(0.1).minutes.do(archiving_job)
while True:
schedule.run_pending()
It is not completely 'runnable' from the part shown, but the key point is that I would like to replace the line
json.dump(item, f)
with a similar line in which f is a rotating, and not fixed, file object.
Following Stanislav Ivanov, I used json.dumps to convert each RethinkDB document to a string and wrote this to a RotatingFileHandler:
import os
import sys
import json
import rethinkdb as r
import pytz
from datetime import datetime, timedelta
import schedule
import time
import functools
from iclib import RethinkDB
import msgpack
import logging
from logging.handlers import RotatingFileHandler
from random_data_generator import RandomDataGenerator
''' The purpose of the Controller is to periodically archive data from the "sensor_data" table so that it does not grow without limit.'''
os.environ['DB_ADDR'] = 'localhost'
os.environ['DB_PORT'] = '28015'
os.environ['DB_NAME'] = 'ipercron'
class Controller(RethinkDB):
def __init__(self, db_address=None, db_name=None):
if db_address is None:
db_address = (os.environ['DB_ADDR'], int(os.environ['DB_PORT'])) # The default host ("rethinkdb") and port (28015) are stored as environment variables
if db_name is None:
db_name = os.environ['DB_NAME'] # The default database is "ipercron" and is stored as an environment variable
super(Controller, self).__init__(db_address=db_address, db_name=db_name) # Initialize the instance of the RethinkDB class. IperCronComponent will be initialized with its default logger name (in this case, "Controller")
self.db_name = db_name
self.db_table = RethinkDB.SENSOR_DATA_TABLE # The table name is "sensor_data" and is stored as a class variable of RethinkDBMixIn
self.table = r.db(self.db_name).table(self.db_table)
self.archiving_logger = logging.getLogger("archiving_logger")
self.archiving_logger.setLevel(logging.DEBUG)
self.archiving_handler = RotatingFileHandler("archived_sensor_data.log", maxBytes=2000, backupCount=10)
self.archiving_logger.addHandler(self.archiving_handler)
def generate_archiving_query(self, retention_period=timedelta(days=3)):
expiry_time = r.now() - retention_period.total_seconds() # Timestamp before which data is to be archived
if "timestamp" in self.table.index_list().run(self.db):
beginning_of_time = r.time(1400, 1, 1, 'Z') # The minimum time of a ReQL time object (namely, the year 1400 in UTC)
data_to_archive = self.table.between(beginning_of_time, expiry_time, index="timestamp") # Generate query using "between" (faster, requires "timestamp" to be a secondary index)
else:
data_to_archive = self.table.filter(r.row['timestamp'] < expiry_time) # Generate query using "filter" (slower, but does not require "timestamp" to be a secondary index)
return data_to_archive
def archiving_job(self, data_to_archive=None):
if data_to_archive is None:
data_to_archive = self.generate_archiving_query() # By default, the call the "generate_archiving_query" function to generate the query
old_data = data_to_archive.run(self.db, time_format="raw") # Without time_format="raw" the output does not dump to JSON or msgpack
ids_to_delete = []
for item in old_data:
print item
self.dump(item)
ids_to_delete.append(item['id'])
self.table.get_all(r.args(ids_to_delete)).delete().run(self.db) # Delete based on ID. It is preferred to delete the entire batch in a single operation rather than to delete them one by one in the for-loop.
def dump(self, item, mode='json'):
if mode == 'json':
dump_string = json.dumps(item)
elif mode == 'msgpack':
dump_string = msgpack.packb(item)
self.archiving_logger.debug(dump_string)
def populate_database(db_name, table_name, conn):
if db_name not in r.db_list().run(conn):
r.db_create(db_name).run(conn) # Create the database if it does not yet exist
if table_name not in r.db(db_name).table_list().run(conn):
r.db(db_name).table_create(table_name).run(conn) # Create the table if it does not yet exist
r.db(db_name).table(table_name).delete().run(conn) # Empty the table to start with a clean slate
# Generate random data with timestamps uniformly distributed over the past 6 days
random_data_time_interval = timedelta(days=6)
start_random_data = datetime.utcnow().replace(tzinfo=pytz.utc) - random_data_time_interval
random_generator = RandomDataGenerator(seed=0)
packets = random_generator.packets(N=100, start=start_random_data)
# print packets
print "Adding data to the database..."
r.db(db_name).table(table_name).insert(packets).run(conn)
if __name__ == "__main__":
db_name = "ipercron"
table_name = "sensor_data"
port_offset = 1 # To avoid interference of this testing program with the main program, all ports are initialized at an offset of 1 from the default ports using "rethinkdb --port_offset 1" at the command line.
host = "localhost"
port = 28015 + port_offset
conn = r.connect(host, port) # RethinkDB connection object
populate_database(db_name, table_name, conn)
# import rethinkdb_add_data
controller = Controller(db_address=(host, port))
archiving_job = functools.partial(controller.archiving_job, data_to_archive=controller.generate_archiving_query()) # This ensures that the query is only generated once. (This is sufficient since r.now() is re-evaluated every time a connection is made).
schedule.every(0.1).minutes.do(archiving_job)
while True:
schedule.run_pending()
In this context the RethinkDB class does little other than define the class variable SENSOR_DATA_TABLE and the RethinkDB connection, self.db = r.connect(self.address[0], self.address[1]). This is run together with a module for generating fake data, random_data_generator.py:
import random
import faker
from datetime import datetime, timedelta
import pytz
import rethinkdb as r
class RandomDataGenerator(object):
def __init__(self, seed=None):
self._seed = seed
self._random = random.Random()
self._random.seed(seed)
self.fake = faker.Faker()
self.fake.random.seed(seed)
def __getattr__(self, x):
return getattr(self._random, x)
def name(self):
return self.fake.name()
def datetime(self, start=None, end=None):
if start is None:
start = datetime(2000, 1, 1, tzinfo=pytz.utc) # Jan 1st 2000
if end is None:
end = datetime.utcnow().replace(tzinfo=pytz.utc)
if isinstance(end, datetime):
dt = end - start
elif isinstance(end, timedelta):
dt = end
assert isinstance(dt, timedelta)
random_dt = timedelta(microseconds=self._random.randrange(int(dt.total_seconds() * (10 ** 6))))
return start + random_dt
def packets(self, N=1, start=None, end=None):
return [{'name': self.name(), 'timestamp': self.datetime(start=start, end=end)} for _ in range(N)]
When I run controller it produces several rolled-over output logs, each at most 2 kB in size, as expected:

Python loop inserting last row only in cassandra

I typed a small demo loop in order to insert random values in Cassandra but only the last record is persisted into the database. I am using cassandra-driver from datastax and its object modeling lib. Cassandra version is 3.7 and Python 3.4. Any idea what I am doing wrong?
#!/usr/bin/env python
import datetime
import uuid
from random import randint, uniform
from cassandra.cluster import Cluster
from cassandra.cqlengine import connection, columns
from cassandra.cqlengine.management import sync_table
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.query import BatchQuery
class TestTable(Model):
_table_name = 'test_table'
key = columns.UUID(primary_key=True, default=uuid.uuid4())
type = columns.Integer(index=True)
value = columns.Float(required=False)
created_time = columns.DateTime(default=datetime.datetime.now())
def main():
connection.setup(['127.0.0.1'], 'test', protocol_version = 3)
sync_table(TestTable)
for _ in range(10):
type = randint(1, 3)
value = uniform(-10, 10)
row = TestTable.create(type=type, value=value)
print("Inserted row: ", row.type, row.value)
print("Done inserting")
q = TestTable.objects.count()
print("We have inserted " + str(q) + " rows.")
if __name__ == "__main__":
main()
Many thanks!
The problem is in the definition of the key column:
key = columns.UUID(primary_key=True, default=uuid.uuid4())
For the default value it's going to call the uuid.uuid4 function once and use that result as the default for all future inserts. Because that's your primary key, all 10 writes will happen to the same primary key.
Instead, drop the parentheses so you are just passing a reference to uuid.uuid4 rather than calling it:
key = columns.UUID(primary_key=True, default=uuid.uuid4)
Now each time you create a row you'll get a new unique UUID value, and therefore a new row in Cassandra.
You need to use the method save.
...
row = TestTable(type=type, value=value)
row.save()
...
http://cqlengine.readthedocs.io/en/latest/topics/models.html#cqlengine.models.Model.save

Categories

Resources