Upload file (size <16MB) to MongoDB - python

I have a requirement to upload file to MongoDB. Currently I am saving files in a folder in current filesystem using Flask. Is there a way I can upload file to MongoDB without using GridFS? I believe I did something like this long before but I cannot recollect since its been longtime since I last used MongoDB.
Any file I select to upload is no more than 16MB in size.
Update: I tried this to convert image file using binData but it throws error global name binData is not defined.
import pymongo
import base64
import bson
# establish a connection to the database
connection = pymongo.MongoClient()
#get a handle to the test database
db = connection.test
file_meta = db.file_meta
file_used = "Headshot.jpg"
def main():
coll = db.sample
with open(file_used, "r") as fin:
f = fin.read()
encoded = binData(f)
coll.insert({"filename": file_used, "file": f, "description": "test" })

Mongo BSON (https://docs.mongodb.com/manual/reference/bson-types/) has binary data (binData) type for field.
Python driver (http://api.mongodb.com/python/current/api/bson/binary.html) supports it.
You can store file as array of bytes.
You code should be slightly modified:
Add import: from bson.binary import Binary
Encode file bytes using Binary: encoded = Binary(f)
Use encoded value in insert statement.
Full example below:
import pymongo
import base64
import bson
from bson.binary import Binary
# establish a connection to the database
connection = pymongo.MongoClient()
#get a handle to the test database
db = connection.test
file_meta = db.file_meta
file_used = "Headshot.jpg"
def main():
coll = db.sample
with open(file_used, "rb") as f:
encoded = Binary(f.read())
coll.insert({"filename": file_used, "file": encoded, "description": "test" })

Related

Download json file from MongoDB atlas with python

I want to download the entire collection and put it into a json file. I've tried (see below) but it doesnt work.
import json
from pymongo import MongoClient
import pymongo
from pathlib import Path
myclient = MongoClient("mongodb+srv://<DbName>:<DbPass>#<DbName>.a3b2ai.mongodb.net/<DbName>?retryWrites=true&w=majority")
db = myclient["PlayerPrices"]
Collection = db["Playstation"]
payload = db.inventory.find( {} ) #I think this command is the problem
with open(str(Path(__file__).parents[1]) + '\Main\playstation_1.json', 'r+') as file:
json.dump(payload, file, indent=4)
The issue is that you need to convert the Pymongo Cursor to support json format.
# Python Program for
# demonstrating the
# PyMongo Cursor to JSON
# Importing required modules
from pymongo import MongoClient
from bson.json_util import dumps, loads
# Connecting to MongoDB server
# client = MongoClient('host_name',
# 'port_number')
client = MongoClient('localhost', 27017)
# Connecting to the database named
# GFG
mydatabase = client.GFG
# Accessing the collection named
# gfg_collection
mycollection = mydatabase.College
# Now creating a Cursor instance
# using find() function
cursor = mycollection.find()
# Converting cursor to the list
# of dictionaries
list_cur = list(cursor)
# Converting to the JSON
json_data = dumps(list_cur, indent = 2)
# Writing data to file data.json
with open('data.json', 'w') as file:
file.write(json_data)
Resource taken from: https://www.geeksforgeeks.org/convert-pymongo-cursor-to-json/

Store a PDF file in my MongoDB database with PYmongo error

I want to store a PDF file in my MongoDB database (In Ubuntu) with with PYMonbgo and gridfs.
But I am receiving the error 'utf-8' codec can't decode byte 0xe2 in position 10: invalid continuation byte
How can I store and receive a PDF with python in MongoDB?
from pymongo import MongoClient
import gridfs
db = MongoClient('mongodb://localhost:27017/').myDB
fs = gridfs.GridFS( db )
fileID = fs.put( open(('Test.pdf') ))
out = fs.get(fileID)
You need to encode the PDF appropriately after reading. I won't pretend to understand the details. But I have gotten it to work. Try this, see if it works for you too.
(FYI, Might want to also specify the collection)
import base64
import gridfs
def write_new_pdf(path):
db = MongoClient('mongodb://localhost:27017/').myDB
fs = gridfs.GridFS(db)
# Note, open with the "rb" flag for "read bytes"
with open(path, "rb") as f:
encoded_string = base64.b64encode(f.read())
with fs.new_file(
chunkSize=800000,
filename=path) as fp:
fp.write(encoded_string)
Updaate: How to read the pdf back
def read_pdf(filename):
# Usual setup
db = MongoClient('mongodb://localhost:27017/').myDB
fs = gridfs.GridFS(db)
# Standard query to Mongo
data = fs.find_one(filter=dict(filename=filename))
with open(filename, "wb") as f:
f.write(base64.b64decode(data.read()))

Getting wrong characters in pt-br from xml in python

I'm trying to send data from a XML feed to MySQL database, but I'm getting wrong pt-br characters in python and mysql.
import MySQLdb
import urllib2
import sys
import codecs
## default enconding
reload(sys)
sys.setdefaultencoding('utf-8')
UTF8Writer = codecs.getwriter('utf8')
sys.stdout = UTF8Writer(sys.stdout)
file = urllib2.urlopen('feed.xml')
data = file.read()
file.close()
data = xmltodict.parse(data)
db = MySQLdb.connect(host=MYSQL_HOST, # your host, usually localhost
user=MYSQL_USER, # your username
passwd=MYSQL_PASSWD, # your password
db=MYSQL_DB) # name of the data base
cur = db.cursor()
product_name = str(data.items()[0][1].items()[2][1].items()[3][1][i].items()[1][1])
But when I print product_name in Python or insert it into mysql, I get this:
'Probi\xc3\xb3tica (120caps)'
this should be:
'Probiótica'
How can I fix this?
'Probi\xc3\xb3tica' is the utf-8 encoded version of 'Probiótica'.
Is your terminal (or whatever you are using to run this) set up to handle utf-8 output?
Try print 'Probi\xc3\xb3tica'.decode('utf-8') to see what happens.
I get Probiótica.

Save a pdf file stored in Mongodb GridFS using Python

I had uploaded some PDF, PNG files to a local instance of mongodb. By mistake I deleted these files and I can no longer recover them using the regular recover options. However, they are in my local mongodb database. How can I save them back in their original format on my computer?
I know the following:
import pymongo as pym
import gridfs
def connectToDb():
client = pym.MongoClient('mongodb://localhost:27017/')
db = client.questionbank
collectn = db.questionbank
fs = gridfs.GridFS(db)
return db, collectn, fs
db, collectn, fs = connectToDb()
filelist = list( db.fs.files.find({}, {"_id": 1, "filename": 1}) )
fileid = filelist[0]['_id']
fobj = fs.get(fileid)
## I don't know what to do after this. I think I cannot use read since I don't
## want the string. I want to save the pdf file as a pdf file.
Any help will be greatly appreciated. Thanks in advance.
Okay, I figured this out on my own. It can be done in the following way:
To the above code add the lines:
f = open('tempfigfile.pdf', 'wb')
f.write(fobj.read())
f.close()
This saves the file as tempfigfile.pdf.
This code will save all the files to ur local folder from mongodb gridfs.
i=0
cursor=fs.find()
while(i < cursor.count()):
fi=cursor.next()
with open("C:\\localfolder\\"+fi.filename,"wb") as f:
f.write(fi.read())
f.closed
i=i+1

Save jpg image to mongodb using gridfs with python and retrieve it from database when needed

I'm trying to save jpg image to mongodb using gridfs with python and retrieve it from database when needed. It looks like it saves fine, then I store its fileid in collection and when reading I use it to locate the image. But it deosn't give me back the image I saved before.
Here is the class definition:
class dbase():
database=""
def __init__(self,dbserver,dbport ):
from pymongo import MongoClient
client =MongoClient()
self.database = client['test']
def Savetodb(self,event):
import gridfs
import tkMessageBox
result = tkMessageBox.askokcancel("Save to db???", "Confirm Save")
if result:
# get image name and image file from the widget box
filename=event.widget.cget("text")
img =event.widget.cget("image")
fs = gridfs.GridFS(self.database)
fileid = fs.put(img,filename=filename)
self.database.my_collection.insert_one({"imagefile":filename,"fileid":fileid})
def Getfromdb(self,outfilename):
import gridfs
fs = gridfs.GridFS(self.database)
fid = ""
for item in self.database.my_collection.find({"imagefile":outfilename}):
fid=item["fileid"]
if fid <> "":
outputdata = fs.get(fid).read()
return outputdata

Categories

Resources