Is there an equivalent function in PyMongo or mongoengine to MongoDB's mongodump? I can't seem to find anything in the docs.
Use case: I need to periodically backup a remote mongo database. The local machine is a production server that does not have mongo installed, and I do not have admin rights, so I can't use subprocess to call mongodump. I could install the mongo client locally on a virtualenv, but I'd prefer an API call.
Thanks a lot :-).
For my relatively small small database, I eventually used the following solution. It's not really suitable for big or complex databases, but it suffices for my case. It dumps all documents as a json to the backup directory. It's clunky, but it does not rely on other stuff than pymongo.
from os.path import join
import pymongo
from bson.json_utils import dumps
def backup_db(backup_db_dir):
client = pymongo.MongoClient(host=<host>, port=<port>)
database = client[<db_name>]
authenticated = database.authenticate(<uname>,<pwd>)
assert authenticated, "Could not authenticate to database!"
collections = database.collection_names()
for i, collection_name in enumerate(collections):
col = getattr(database,collections[i])
collection = col.find()
jsonpath = collection_name + ".json"
jsonpath = join(backup_db_dir, jsonpath)
with open(jsonpath, 'wb') as jsonfile:
jsonfile.write(dumps(collection))
The accepted answer is not working anymore. Here is a revised code:
from os.path import join
import pymongo
from bson.json_util import dumps
def backup_db(backup_db_dir):
client = pymongo.MongoClient(host=..., port=..., username=..., password=...)
database = client[<db_name>]
collections = database.collection_names()
for i, collection_name in enumerate(collections):
col = getattr(database,collections[i])
collection = col.find()
jsonpath = collection_name + ".json"
jsonpath = join(backup_db_dir, jsonpath)
with open(jsonpath, 'wb') as jsonfile:
jsonfile.write(dumps(collection).encode())
backup_db('.')
Related
so far in my code bellow I managed to store my data into mongoDB.
Now I want to be able to retrieve the data I have stored.
As you can see I have been trying but keep on getting an error.
With BSON do I have to first decode the data to retrieve it from mongoDB?
Any help would be greatly appreciated!
(Apologies for the messy code, I am just practicing through trial and error)
import json
from json import JSONEncoder
import pymongo
from pymongo import MongoClient
from bson.binary import Binary
import pickle
#Do this for each
client = MongoClient("localhost", 27017)
db = client['datacampdb']
coll = db.personpractice4_collection #creating a collection in the database
#my collection on the database is called personpractice4_collection
class Person:
def __init__(self, norwegian, dame, brit, german, sweed):
self.__norwegian = norwegian
self.__dame = dame
self.__brit = brit
self.__german = german #private variable
self.__sweed = sweed
# create getters and setters later to make OOP
personone = Person("norwegian", "dame", "brit", "german","sweed")
class PersonpracticeEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
#Encode Person Object into JSON"
personpracticeJson = json.dumps(personone, indent=4, cls=PersonpracticeEncoder)
practicedata = pickle.dumps(personpracticeJson)
coll.insert_one({'bin-data': Binary(practicedata)})
#print(personpracticeJson)
#print(db.list_collection_names()) #get then names of my collections in DB
#retriving data from mongodb
#Retrieving a Single Document with find_one()
print(({'bin-data': Binary(practicedata)}).find_one()) #not working
the find_one method should be called on a collection
{'bin-data': Binary(practicedata)} is a query to find a document
coll.find_one({'bin-data': Binary(practicedata)})
Witch means : Find a document in the collection coll where bin-data is equal to Binary(practicedata)
I am using the python 3 splunk API to export some massive logs.
My code essentially follows the splunk API guidelines:
import splunklib.client as client
import splunklib.results as results
import pandas as pd
kwargs_export = {"earliest_time": "2019-08-19T12:00:00.000-00:00",
"latest_time": "2019-08-19T14:00:00.000-00:00",
"search_mode": "normal"}
exportsearch_results = service.jobs.export(mysearchquery, **kwargs_export)
reader = results.ResultsReader(exportsearch_results)
df = pd.DataFrame(list(reader))
But this is extremely slow...
Ultimately I want to store the output of the search as a csv to disk. Is there any way to speed the export?
Thanks!
Check this as it works
kwargs_export = {"earliest_time": "-1d",
"latest_time": "now",
"search_mode": "normal"}
service = client.connect(**args)
job = service.jobs.create(query, **kwargs_export)
with open(filename, 'wb') as out_f:
try:
job_results = job.results(output_mode="csv", count=0)
for result in job_results:
out_f.write(result)
except :
print("Session timed out. Reauthenticating")
users path common config
exchange_rate
prod_data
delivery_fee
site shoppingmall settings description
highlight
prohibit_words
I did the following but failed.
db = MongoClient("localhost:99999").users
config_data = get_config() --> just get config_data (json)
db.path.common.config.insert(config_data)
I would like to make it this way for each customer.
What should I do?
(I like examples because I am a beginner... (T.T))
thank you!!
I think failed because your data is not json. if you want insert data from csv file you can try this :
import pandas as pd
from pymongo import MongoClient
import json
def mongoimport(csv_path, db_name, coll_name, db_url='localhost', db_port=27000)
""" Imports a csv file at path csv_name to a mongo colection
returns: count of the documants in the new collection
"""
client = MongoClient(db_url, db_port)
db = client[db_name]
coll = db[coll_name]
data = pd.read_csv(csv_path)
payload = json.loads(data.to_json(orient='records'))
coll.remove()
coll.insert(payload)
return coll.count()
this code simple to understand and this code from https://gist.github.com/jxub/f722e0856ed461bf711684b0960c8458
This question already has answers here:
How to download a file over HTTP?
(30 answers)
Closed 7 years ago.
While building a flask website, I'm using an external JSON feed to feed the local mongoDB with content. This feed is parsed and fed while repurposing keys from the JSON to keys in Mongo.
One of the available keys from the feed is called "img_url" and contains, guess what, an url to an image.
Is there a way, in Python, to mimic a php style cURL? I'd like to grab that key, download the image, and store it somewhere locally while keeping other associated keys, and have that as an entry to my db.
Here is my script up to now:
import json
import sys
import urllib2
from datetime import datetime
import pymongo
import pytz
from utils import slugify
# from utils import logger
client = pymongo.MongoClient()
db = client.artlogic
def fetch_artworks():
# logger.debug("downloading artwork data from Artlogic")
AL_artworks = []
AL_artists = []
url = "http://feeds.artlogic.net/artworks/artlogiconline/json/"
while True:
f = urllib2.urlopen(url)
data = json.load(f)
AL_artworks += data['rows']
# logger.debug("retrieved page %s of %s of artwork data" % (data['feed_data']['page'], data['feed_data']['no_of_pages']))
# Stop we are at the last page
if data['feed_data']['page'] == data['feed_data']['no_of_pages']:
break
url = data['feed_data']['next_page_link']
# Now we have a list called ‘artworks’ in which all the descriptions are stored
# We are going to put them into the mongoDB database,
# Making sure that if the artwork is already encoded (an object with the same id
# already is in the database) we update the existing description instead of
# inserting a new one (‘upsert’).
# logger.debug("updating local mongodb database with %s entries" % len(artworks))
for artwork in AL_artworks:
# Mongo does not like keys that have a dot in their name,
# this property does not seem to be used anyway so let us
# delete it:
if 'artworks.description2' in artwork:
del artwork['artworks.description2']
# upsert int the database:
db.AL_artworks.update({"id": artwork['id']}, artwork, upsert=True)
# artwork['artist_id'] is not functioning properly
db.AL_artists.update({"artist": artwork['artist']},
{"artist_sort": artwork['artist_sort'],
"artist": artwork['artist'],
"slug": slugify(artwork['artist'])},
upsert=True)
# db.meta.update({"subject": "artworks"}, {"updated": datetime.now(pytz.utc), "subject": "artworks"}, upsert=True)
return AL_artworks
if __name__ == "__main__":
fetch_artworks()
First, you might like the requests library.
Otherwise, if you want to stick to the stdlib, it will be something in the lines of:
def fetchfile(url, dst):
fi = urllib2.urlopen(url)
fo = open(dst, 'wb')
while True:
chunk = fi.read(4096)
if not chunk: break
fo.write(chunk)
fetchfile(
data['feed_data']['next_page_link'],
os.path.join('/var/www/static', uuid.uuid1().get_hex()
)
With the correct exceptions catching (i can develop if you want, but i'm sure the documentation will be clear enough).
You could put the fetchfile() into a pool of async jobs to fetch many files at once.
https://docs.python.org/2/library/json.html
https://docs.python.org/2/library/urllib2.html
https://docs.python.org/2/library/tempfile.html
https://docs.python.org/2/library/multiprocessing.html
I've been using Django for a couple of days & setup a basic blog from a tutorial with django comments.
I've got a totally separate python script that generates screenshots and uploads them to Amazon S3, now I'd like my django app to display all the images in the bucket and use a comment system on the images. Preferably I'd do this by just storing the URLs in my sqlite db, which I've got hard-coded currently to display all images in the db and has comments enabled on these.
My model:
(Does this need a foreign key to the django comments or is that just part of the Django Magic?!)
class Image(models.Model):
imgUrl=models.CharField(max_length=200)
meta=models.CharField(max_length=300)
def __unicode__(self):
return self.imgUrl
My bucket structure:
https://s3-eu-west-1.amazonaws.com/bucket/revision/process/images.png
Almost all the tutorials and packages I'm finding are based on upload/download rather than a simple for keys in bucket type approach that I want.
One of my problems is understanding how I can integrate my Boto functions with Django if I'm using Base.html. In an earlier tutorial I had an index page which had a view and could call functions from there. But base doesn't need that so I'm starting to get a little lost.
haven't looked up if boto api changed, but this is how it worked last time i looked
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import s3config
conn = S3Connection(s3config.passwd, s3config.secret)
bucket = conn.get_bucket(s3config.bucket)
s3_path = '/some/path/in/your/bucket'
keys = bucket.list(s3_path)
# or if you want all keys:
# keys = bucket.get_all_keys()
for key in keys:
print key
# here you can download or do other stuff
# with the keys like get some metadata
print key.name
print key.etag
print key.size
print key.last_modified
#s3config.py
passwd = 'BLABALBALABALA'
secret = 'xvdwv3efefefefefef'
bucket = 'name-of-your-bucket'
Update:
Amazon s3 is a key value store, where key is a string. So nothing prevents you from putting in keys like:
/this/string/key/looks/like/a/unix/path
/folder/images/fileA.jpg
/folder/images/fileB.jpg
/folder/images/folderX/fileX1.jpg
now bucket.list(prefix="/folder/images/") would yield the latter three.
Look here for further details:
http://readthedocs.org/docs/boto/en/latest/ref/s3.html#boto-s3-bucket
http://docs.amazonwebservices.com/AmazonS3/latest/API/RESTBucketGET.html?r=8270
This is my code to store result from s3 to mysql by boto, django.
from demo.models import Movies
import boto
from boto.s3.key import Key
import string
from django.db import connection, transaction
def movietitle(b):
key = b.get_key('netflix/movie_titles.txt')
content = key.get_contents_as_string()
line = content.split('\n')
args = []
for imovie in line:
if len(imovie) > 0:
imovie = imovie.split(',')
movieid = imovie[0]
year = imovie[1]
title = imovie[2]
iargs = [string.atoi(movieid),title,year]
args.append(iargs)
cursor = connection.cursor()
sql = "insert into demo_movies(MovieID,MovieName,ReleaseYear) values(%s,%s,%s)"
cursor.executemany(sql,args)
transaction.commit_unless_managed()
cursor.close()