first I'd like to check if the file exists, and Ive used this os.path:
def check_db_exist():
try:
file_exists = exists('games.db')
if file_exists:
file_size = os.path.getsize('games.db')
if file_size > 3000:
return True, file_size
else:
return False, 'too small'
else:
return False, 'does not exist'
except:
return False, 'error'
I have a separate file for my models, and creating the database. My concern is, if I import the class for the database it instantiates the sql file.
Moreover, pywebview when displaying my html, wipes all variables.
If I were to run this process as I load my page, then I can't access the variable for true/false sqlite exists.
db = SqliteDatabase('games.db')
class Game(Model):
game = CharField()
exe = CharField()
path = CharField()
longpath = CharField()
i_d = IntegerField()
class Meta:
database = db
This creates the table, so checking if the file exists is useless.
Then if I uncomment the first line in this file the database gest created, otherwise all of my db. variables are unusable. I must be missing a really obvious function to solve my problems.
# db = SqliteDatabase('games.db')
def add_game(game, exe, path, longpath, i_d):
try:
Game.create(game=game, exe=exe, path=path, longpath=longpath, i_d=i_d)
except:
pass
def loop_insert(lib):
db.connect()
for i in lib[0]:
add_game(i.name, i.exe, i.path, i.longpath, i.id)
db.close()
def initial_retrieve():
db.connect()
vals = ''
for games in Game.select():
val = js.Import.javascript(str(games.game), str(games.exe), str(games.path), games.i_d)
vals = vals + val
storage = vals
db.close()
return storage
should I just import the file at a different point in the file? whenever I feel comfortable? I havent seen that often so I didnt want to be improper in formatting.
edit: edit: Maybe more like this?
def db():
db = SqliteDatabase('games.db')
return db
class Game(Model):
game = CharField()
exe = CharField()
path = CharField()
file 2:
from sqlmodel import db, Game
def add_game(game, exe, path, longpath, i_d):
try:
Game.create(game=game, exe=exe, path=path, longpath=longpath, i_d=i_d)
except:
pass
def loop_insert(lib):
db.connect()
for i in lib[0]:
add_game(i.name, i.exe, i.path, i.longpath, i.id)
db.close()
I am not sure if this answers your question, since it seems to involve multiple processes and/or processors, but In order to check for the existence of a database file, I have used the following:
DATABASE = 'dbfile.db'
if os.path.isfile(DATABASE) is False:
# Create the database file here
pass
else:
# connect to database here
db.connect()
I would suggest using sqlite's user_version pragma:
db = SqliteDatabase('/path/to/db.db')
version = db.pragma('user_version')
if not version: # Assume does not exist/newly-created.
# do whatever.
db.pragma('user_version', 1) # Set user version.
from reddit:
me: To the original challenge, there's a reason I want to know whether the file exists. Maybe its flawed at the premises, I'll explain and you can fill in there.
This script will run on multiple machines I dont ahve access to. At the entry point of a first-time use case, I will be porting data from a remote location, if its the first time the script runs on that machine, its going down a different work flow than a repeated opening.
Akin to grabbing all pc programs vs appending and reading from teh last session. How would you suggest quickly understanding if that process has started and finished from a previous session.
Checking if the sqlite file is made made the most intuitive sense, and then adjusting to byte size. lmk
them:
This is a good question!
How would you suggest quickly understanding if that process
has started and finished from a previous session.
If the first thing your program does on a new system is download some kind of fixture data, then the way I would approach it is to load the DB file as normal, have Peewee ensure the tables exist, and then do a no-clause SELECT on one of them (either through the model, or directly on the database through the connection if you want.) If it's empty (you get no results) then you know you're on a fresh system and you need to make the remote call. If you get results (you don't need to know what they are) then you know you're not on a fresh system.
I have developed a web application with neo4j(py2neo), Flask and HTML. I am creating the Neo4j graph by reading the csv file. This CSV file size is larger. So whenever I start or restart the server Neo4j graph is getting created everytime and it takes lot of time for this setup. Is there anyway I can use the created graph and store it in my machine. So My project struture is I have models.py which contains connecting the graph, creating the graph with relations, and returns the data to routes.py.
My models.py looks like this
from py2neo import Graph, Node, Relationship,NodeMatcher
import pandas as pd
class Query:
"Model the tags"
print("I am coming here")
graph = Graph("http://blah:blah#127.0.0.1/db/data")
print("hi i am second")
try:
graph.run("Match () Return 1 Limit 1")
df=pd.read_csv("data.csv")
print("I am not reading till")
graph.delete_all()
matcher = NodeMatcher(graph)
row_count = len(df)
for i in range(row_count):
tags = df['tags'][i]
each_tag = tags.split('|')
tag_data = [x.lower() for x in each_tag]
for j,first_tag in enumerate(tag_data[:-1]):
match_first_tag = matcher.match("May21tag",tagName=first_tag).first()
graph.push(match_first_tag)
for second_tag in tag_data[j+1:]:
my_tag = second_tag
second_tag = matcher.match("May21tag",tagName=second_tag).first()
graph.push(second_tag)
create_relationship = Relationship(match_first_tag ,"tagged",second_tag)
graph.merge(create_relationship)
except Exception as e:
print('not ok',e)
def __init__(tech):
tech_word = tech
print("in init")
def fetch_nodes(self,tech_word):
graph = Graph("http://blah:blah#127.0.0.1/db/data")
matcher = NodeMatcher(graph)
match_nodes = matcher.match(tagName=tech_word).first()
return (list(r.end_node["tagName"] for r in self.graph.match(nodes=(match_nodes,)).limit(6)))
So what is happening here is whenever I run the app.py I can see the graph and relationship are getting created everytime. So it takes time. WHat I need here is I wanted to use the existing graph with relations whenever I start or restart the server.
Try deleting everything in Query except the __init__ and fetch_nodes method definitions.
I was trying to implement a dashboard by following the instructions of https://github.com/talpor/django-dashing/ using django-dashing.
So far I have successfully customised my widget and displayed with some random data on my own web server, while I have no clue where to start if I'd like to pull some real data from DB(MySQL) and display. (like where to do the DB connecting,..etc)
Could anyone show me steps I should follow to implement it?
If it's still relevant, you can start by connecting to the database with sqlalchemy.
import sqlalchemy as sq
from sqlalchemy.engine import url as sq_url
db_connect_url = sq_url.URL(
drivername='mysql+mysqldb',
username=DB_username,
password=DB_password,
host=DB_hostname,
port=DB_port,
database=DB_name,
)
engine = sq.create_engine(db_connect_url)
From there you can manipulate the data by checking the available methods on engine. What I normally do is use pandas in situations like these.
import pandas as pd
df = pd.read_sql_table(table_name, engine)
I also had to do this recently.... I managed to get it sorted - but it is a little too clunky.
I created a Separate CherryPy REST Api in a separate Project. The Entry point looks like
#cherrpy.expose
def web_api_to_call(self, table,value):
#Do SQL Query
return str(sql_table_value)
Then in Django Created a New app, then created a Widget.py. Inside the Widget.py I wrote something like this.
import requests
class webquery(NumberWidget):
classparams=[("widget1","web_api_to_call","table","values"),
("widget2","web_api_to_call","table2","values2"),
("widget3","web_api_to_call","table3","values3")]
def myget(self):
for tup in self.classparams:
if tup[0]==type(self).__name__:
url=tup[1]
table=tup[2]
value=tup[3]
url = "http://127.0.0.1:8000/"+url
# Do Web Call Error Checking Omitted
return requests.get(url,params={"table":table,"values":value)}).text()
def get_value(self):
#Override default
return self.my_get()
#Now create new Widgets as per the static definition at the top
class widget1(web query):
id=1
class widget2(web query):
id=1
class widget3(web query):
id=1
You now just add your new widgets - as you normally would in the urls.py and then in the dashing-config.js and you are done.
Hope this assists someone.
I am working with flask and redis. I've using the rom redis orm (http://pythonhosted.org/rom/) to manage some mildly complex data structures. I want to add the ability to set the objects to set an expiry time.
Based on https://github.com/josiahcarlson/rom/issues/40 and https://github.com/josiahcarlson/rom/pull/47
I have a rom model:
class A(rom.Model):
url = rom.String(required=True, unique=True)()
t = rom.String()
delete_at = rom.Float(index=True)
created_at = rom.Float(default=time.time, index=True)
which I can instantiate and save:
a_object = A(url=u, delete_at =time.time+7200) # 2 hour expiry
try:
ad_object.save()
except Exception,e:
print str(e)
I have a cronjob which executes every hour and so I want to do something like:
delete_list = A.get_by((time.time()-delete_at>0)) # pseudocode.
Obviously this is incorrect, but if I can get this into a list I could delete these. How can I express the above pseudocode using the rom ORM?
I emailed Josiah, the package developer. His answer:
"This can be done in one of 2 ways:
delete_list = A.get_by(delete_at=(0, time.time()))
delete_list = A.query.filter(delete_at=(0, time.time())).all()
The query attribute/object offers a collection of different filers, depending on your defined indexes."
I would like to wipe out all data for a specific kind in Google App Engine. What is the
best way to do this?
I wrote a delete script (hack), but since there is so much data is
timeout's out after a few hundred records.
I am currently deleting the entities by their key, and it seems to be faster.
from google.appengine.ext import db
class bulkdelete(webapp.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/plain'
try:
while True:
q = db.GqlQuery("SELECT __key__ FROM MyModel")
assert q.count()
db.delete(q.fetch(200))
time.sleep(0.5)
except Exception, e:
self.response.out.write(repr(e)+'\n')
pass
from the terminal, I run curl -N http://...
You can now use the Datastore Admin for that: https://developers.google.com/appengine/docs/adminconsole/datastoreadmin#Deleting_Entities_in_Bulk
If I were a paranoid person, I would say Google App Engine (GAE) has not made it easy for us to remove data if we want to. I am going to skip discussion on index sizes and how they translate a 6 GB of data to 35 GB of storage (being billed for). That's another story, but they do have ways to work around that - limit number of properties to create index on (automatically generated indexes) et cetera.
The reason I decided to write this post is that I need to "nuke" all my Kinds in a sandbox. I read about it and finally came up with this code:
package com.intillium.formshnuker;
import java.io.IOException;
import java.util.ArrayList;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.appengine.api.datastore.Key;
import com.google.appengine.api.datastore.Query;
import com.google.appengine.api.datastore.Entity;
import com.google.appengine.api.datastore.FetchOptions;
import com.google.appengine.api.datastore.DatastoreService;
import com.google.appengine.api.datastore.DatastoreServiceFactory;
import com.google.appengine.api.labs.taskqueue.QueueFactory;
import com.google.appengine.api.labs.taskqueue.TaskOptions.Method;
import static com.google.appengine.api.labs.taskqueue.TaskOptions.Builder.url;
#SuppressWarnings("serial")
public class FormsnukerServlet extends HttpServlet {
public void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
response.setContentType("text/plain");
final String kind = request.getParameter("kind");
final String passcode = request.getParameter("passcode");
if (kind == null) {
throw new NullPointerException();
}
if (passcode == null) {
throw new NullPointerException();
}
if (!passcode.equals("LONGSECRETCODE")) {
response.getWriter().println("BAD PASSCODE!");
return;
}
System.err.println("*** deleting entities form " + kind);
final long start = System.currentTimeMillis();
int deleted_count = 0;
boolean is_finished = false;
final DatastoreService dss = DatastoreServiceFactory.getDatastoreService();
while (System.currentTimeMillis() - start < 16384) {
final Query query = new Query(kind);
query.setKeysOnly();
final ArrayList<Key> keys = new ArrayList<Key>();
for (final Entity entity: dss.prepare(query).asIterable(FetchOptions.Builder.withLimit(128))) {
keys.add(entity.getKey());
}
keys.trimToSize();
if (keys.size() == 0) {
is_finished = true;
break;
}
while (System.currentTimeMillis() - start < 16384) {
try {
dss.delete(keys);
deleted_count += keys.size();
break;
} catch (Throwable ignore) {
continue;
}
}
}
System.err.println("*** deleted " + deleted_count + " entities form " + kind);
if (is_finished) {
System.err.println("*** deletion job for " + kind + " is completed.");
} else {
final int taskcount;
final String tcs = request.getParameter("taskcount");
if (tcs == null) {
taskcount = 0;
} else {
taskcount = Integer.parseInt(tcs) + 1;
}
QueueFactory.getDefaultQueue().add(
url("/formsnuker?kind=" + kind + "&passcode=LONGSECRETCODE&taskcount=" + taskcount).method(Method.GET));
System.err.println("*** deletion task # " + taskcount + " for " + kind + " is queued.");
}
response.getWriter().println("OK");
}
}
I have over 6 million records. That's a lot. I have no idea what the cost will be to delete the records (maybe more economical not to delete them). Another alternative would be to request a deletion for the entire application (sandbox). But that's not realistic in most cases.
I decided to go with smaller groups of records (in easy query). I know I could go for 500 entities, but then I started receiving very high rates of failure (re delete function).
My request from GAE team: please add a feature to delete all entities of a kind in a single transaction.
Presumably your hack was something like this:
# Deleting all messages older than "earliest_date"
q = db.GqlQuery("SELECT * FROM Message WHERE create_date < :1", earliest_date)
results = q.fetch(1000)
while results:
db.delete(results)
results = q.fetch(1000, len(results))
As you say, if there's sufficient data, you're going to hit the request timeout before it gets through all the records. You'd have to re-invoke this request multiple times from outside to ensure all the data was erased; easy enough to do, but hardly ideal.
The admin console doesn't seem to offer any help, as (from my own experience with it), it seems to only allow entities of a given type to be listed and then deleted on a page-by-page basis.
When testing, I've had to purge my database on startup to get rid of existing data.
I would infer from this that Google operates on the principle that disk is cheap, and so data is typically orphaned (indexes to redundant data replaced), rather than deleted. Given there's a fixed amount of data available to each app at the moment (0.5 GB), that's not much help for non-Google App Engine users.
Try using App Engine Console then you dont even have to deploy any special code
I've tried db.delete(results) and App Engine Console, and none of them seems to be working for me. Manually removing entries from Data Viewer (increased limit up to 200) didn't work either since I have uploaded more than 10000 entries. I ended writing this script
from google.appengine.ext import db
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
import wsgiref.handlers
from mainPage import YourData #replace this with your data
class CleanTable(webapp.RequestHandler):
def get(self, param):
txt = self.request.get('table')
q = db.GqlQuery("SELECT * FROM "+txt)
results = q.fetch(10)
self.response.headers['Content-Type'] = 'text/plain'
#replace yourapp and YouData your app info below.
self.response.out.write("""
<html>
<meta HTTP-EQUIV="REFRESH" content="5; url=http://yourapp.appspot.com/cleanTable?table=YourData">
<body>""")
try:
for i in range(10):
db.delete(results)
results = q.fetch(10, len(results))
self.response.out.write("<p>10 removed</p>")
self.response.out.write("""
</body>
</html>""")
except Exception, ints:
self.response.out.write(str(inst))
def main():
application = webapp.WSGIApplication([
('/cleanTable(.*)', CleanTable),
])
wsgiref.handlers.CGIHandler().run(application)
The trick was to include redirect in html instead of using self.redirect. I'm ready to wait overnight to get rid of all the data in my table. Hopefully, GAE team will make it easier to drop tables in the future.
The official answer from Google is that you have to delete in chunks spread over multiple requests. You can use AJAX, meta refresh, or request your URL from a script until there are no entities left.
The fastest and efficient way to handle bulk delete on Datastore is by using the new mapper API announced on the latest Google I/O.
If your language of choice is Python, you just have to register your mapper in a mapreduce.yaml file and define a function like this:
from mapreduce import operation as op
def process(entity):
yield op.db.Delete(entity)
On Java you should have a look to this article that suggests a function like this:
#Override
public void map(Key key, Entity value, Context context) {
log.info("Adding key to deletion pool: " + key);
DatastoreMutationPool mutationPool = this.getAppEngineContext(context)
.getMutationPool();
mutationPool.delete(value.getKey());
}
One tip. I suggest you get to know the remote_api for these types of uses (bulk deleting, modifying, etc.). But, even with the remote api, batch size can be limited to a few hundred at a time.
Unfortunately, there's no way to easily do a bulk delete. Your best bet is to write a script that deletes a reasonable number of entries per invocation, and then call it repeatedly - for example, by having your delete script return a 302 redirect whenever there's more data to delete, then fetching it with "wget --max-redirect=10000" (or some other large number).
With django, setup url:
url(r'^Model/bdelete/$', v.bulk_delete_models, {'model':'ModelKind'}),
Setup view
def bulk_delete_models(request, model):
import time
limit = request.GET['limit'] or 200
start = time.clock()
set = db.GqlQuery("SELECT __key__ FROM %s" % model).fetch(int(limit))
count = len(set)
db.delete(set)
return HttpResponse("Deleted %s %s in %s" % (count,model,(time.clock() - start)))
Then run in powershell:
$client = new-object System.Net.WebClient
$client.DownloadString("http://your-app.com/Model/bdelete/?limit=400")
If you are using Java/JPA you can do something like this:
em = EntityManagerFactoryUtils.getTransactionalEntityManager(entityManagerFactory)
Query q = em.createQuery("delete from Table t");
int number = q.executeUpdate();
Java/JDO info can be found here: http://code.google.com/appengine/docs/java/datastore/queriesandindexes.html#Delete_By_Query
Yes you can:
Go to Datastore Admin, and then select the Entitiy type you want to delete and click Delete.
Mapreduce will take care of deleting!
On a dev server, one can cd to his app's directory then run it like this:
dev_appserver.py --clear_datastore=yes .
Doing so will start the app and clear the datastore. If you already have another instance running, the app won't be able to bind to the needed IP and therefore fail to start...and to clear your datastore.
You can use the task queues to delete chunks of say 100 objects.
Deleting objects in GAE shows how limited the Admin capabilities are in GAE. You have to work with batches on 1000 entities or less. You can use the bulkloader tool that works with csv's but the documentation does not cover java.
I am using GAE Java and my strategy for deletions involves having 2 servlets, one for doing the actually delete and another to load the task queues. When i want to do a delete, I run the queue loading servlet, it loads the queues and then GAE goes to work executing all the tasks in the queue.
How to do it:
Create a servlet that deletes a small number of objects.
Add the servlet to your task queues.
Go home or work on something else ;)
Check the datastore every so often ...
I have a datastore with about 5000 objects that i purge every week and it takes about 6 hours to clean out, so i run the task on Friday night.
I use the same technique to bulk load my data which happens to be about 5000 objects, with about a dozen properties.
This worked for me:
class ClearHandler(webapp.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/plain'
q = db.GqlQuery("SELECT * FROM SomeModel")
self.response.out.write("deleting...")
db.delete(q)
Thank you all guys, I got what I need. :D
This may be useful if you have lots db models to delete, you can dispatch it in your terminal. And also, you can manage the delete list in DB_MODEL_LIST yourself.
Delete DB_1:
python bulkdel.py 10 DB_1
Delete All DB:
python bulkdel.py 11
Here is the bulkdel.py file:
import sys, os
URL = 'http://localhost:8080'
DB_MODEL_LIST = ['DB_1', 'DB_2', 'DB_3']
# Delete Model
if sys.argv[1] == '10' :
command = 'curl %s/clear_db?model=%s' % ( URL, sys.argv[2] )
os.system( command )
# Delete All DB Models
if sys.argv[1] == '11' :
for model in DB_MODEL_LIST :
command = 'curl %s/clear_db?model=%s' % ( URL, model )
os.system( command )
And here is the modified version of alexandre fiori's code.
from google.appengine.ext import db
class DBDelete( webapp.RequestHandler ):
def get( self ):
self.response.headers['Content-Type'] = 'text/plain'
db_model = self.request.get('model')
sql = 'SELECT __key__ FROM %s' % db_model
try:
while True:
q = db.GqlQuery( sql )
assert q.count()
db.delete( q.fetch(200) )
time.sleep(0.5)
except Exception, e:
self.response.out.write( repr(e)+'\n' )
pass
And of course, you should map the link to model in a file(like main.py in GAE), ;)
In case some guys like me need it in detail, here is part of main.py:
from google.appengine.ext import webapp
import utility # DBDelete was defined in utility.py
application = webapp.WSGIApplication([('/clear_db',utility.DBDelete ),('/',views.MainPage )],debug = True)
To delete all entities in a given kind in Google App Engine you only need to do as follows:
from google.cloud import datastore
query = datastore.Client().query(kind = <KIND>)
results = query.fetch()
for result in results:
datastore.Client().delete(result.key)
In javascript, the following will delete all the entries for on page:
document.getElementById("allkeys").checked=true;
checkAllEntities();
document.getElementById("delete_button").setAttribute("onclick","");
document.getElementById("delete_button").click();
given that you are on the admin-page (.../_ah/admin) with the entities you want to delete.