Download json file from MongoDB atlas with python - python

I want to download the entire collection and put it into a json file. I've tried (see below) but it doesnt work.
import json
from pymongo import MongoClient
import pymongo
from pathlib import Path
myclient = MongoClient("mongodb+srv://<DbName>:<DbPass>#<DbName>.a3b2ai.mongodb.net/<DbName>?retryWrites=true&w=majority")
db = myclient["PlayerPrices"]
Collection = db["Playstation"]
payload = db.inventory.find( {} ) #I think this command is the problem
with open(str(Path(__file__).parents[1]) + '\Main\playstation_1.json', 'r+') as file:
json.dump(payload, file, indent=4)

The issue is that you need to convert the Pymongo Cursor to support json format.
# Python Program for
# demonstrating the
# PyMongo Cursor to JSON
# Importing required modules
from pymongo import MongoClient
from bson.json_util import dumps, loads
# Connecting to MongoDB server
# client = MongoClient('host_name',
# 'port_number')
client = MongoClient('localhost', 27017)
# Connecting to the database named
# GFG
mydatabase = client.GFG
# Accessing the collection named
# gfg_collection
mycollection = mydatabase.College
# Now creating a Cursor instance
# using find() function
cursor = mycollection.find()
# Converting cursor to the list
# of dictionaries
list_cur = list(cursor)
# Converting to the JSON
json_data = dumps(list_cur, indent = 2)
# Writing data to file data.json
with open('data.json', 'w') as file:
file.write(json_data)
Resource taken from: https://www.geeksforgeeks.org/convert-pymongo-cursor-to-json/

Related

How to let user upload CSV to MongoDB from any directory

I've been trying to do a simple upload function that let's the user choose a CSV file from his PC and upload it into my Mongo DB. I am currently using Python, Pymongo and Pandas to do it and it works, but only with my "local" adress (C:\Users\joao.soeiro\Downloads) as it shows on the code.
I'd like to know how I could make this string "dynamic" so it reads and uploads files from anywhere, not only my computer. I know it must be a silly question but im really a begginer here...
Thought about creating some temporary directory using tempfile() module but idk how I'd put it to work in my code, which is the following:
import pandas as pd
from pymongo import MongoClient
client = MongoClient("mongodb+srv://xxx:xxx#bycardb.lrp4p.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
print('connected')
db = client['dbycar']
collection = db['users']
data = pd.read_csv(r'C:\Users\joao.soeiro\Downloads\csteste4.csv')
data.reset_index(inplace=True)
data_dict = data.to_dict("records")
collection.insert_many(data_dict)
Solved with this:
import tkinter as tk
from IPython.display import display
from tkinter import filedialog
import pandas as pd
from pymongo import MongoClient
#conecting db
client = MongoClient("mongodb+srv://xxxx:xxxx#bycardb.lrp4p.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
print('conectado com o banco')
db = client['dbycar']
collection = db['usuarios']
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
print(file_path)
data = pd.read_csv(file_path)
data.reset_index(inplace=True)
data_dict = data.to_dict("records")
df = pd.DataFrame(data_dict)
display(df)
collection.insert_many(data_dict)
print('uploaded')

How to generalise the import script

I have a query to generate a CSV file from the data in a Postgres Table.The script is working fine.
But i have a situation where i need to create separate files using the data from a different table.
So basically only the below hardcoded one change and rest code is same.Now the situation is i have to create separate scripts for all CSV's.
Is there a way i can have one script and only change this parameters.
I'm using Jenkins to automate the CSV file creation.
filePath = '/home/jenkins/data/'
fileName = 'data.csv'
import csv
import os
import psycopg2
from pprint import pprint
from datetime import datetime
from utils.config import Configuration as Config
from utils.postgres_helper import get_connection
from utils.utils import get_global_config
# File path and name.
filePath = '/home/jenkins/data/'
fileName = 'data.csv'
# Database connection variable.
connect = None
# Check if the file path exists.
if os.path.exists(filePath):
try:
# Connect to database.
connect = get_connection(get_global_config(), 'dwh')
except psycopg2.DatabaseError as e:
# Confirm unsuccessful connection and stop program execution.
print("Database connection unsuccessful.")
quit()
# Cursor to execute query.
cursor = connect.cursor()
# SQL to select data from the google feed table.
sqlSelect = "SELECT * FROM data"
try:
# Execute query.
cursor.execute(sqlSelect)
# Fetch the data returned.
results = cursor.fetchall()
# Extract the table headers.
headers = [i[0] for i in cursor.description]
# Open CSV file for writing.
csvFile = csv.writer(open(filePath + fileName, 'w', newline=''),
delimiter=',', lineterminator='\r\n',
quoting=csv.QUOTE_ALL, escapechar='\\')
# Add the headers and data to the CSV file.
csvFile.writerow(headers)
csvFile.writerows(results)
# Message stating export successful.
print("Data export successful.")
print('CSV Path : '+ filePath+fileName)
except psycopg2.DatabaseError as e:
# Message stating export unsuccessful.
print("Data export unsuccessful.")
quit()
finally:
# Close database connection.
connect.close()
else:
# Message stating file path does not exist.
print("File path does not exist.")

Error on insert_many() : document must be an instance of dict

I am trying to store json data into a mongoDB using pyMongo and when I run the following python script:
import pymongo
from pymongo import MongoClient
client = MongoClient()
db = client.twitterdata
f = open('twitterdata.json', 'r')
dblist = []
for line in f:
dblist.append(line)
db.collection.insert_many(dblist)
f.close()
I get the error :
TypeError: document must be an instance of dict, bson.son.SON, bson.raw_bson.RawBSONDocument, or a type that inherits from collections.MutableMapping
The json file, namely twitterdata.json has 10 lines each line having a dict. I intend to store the dict at each line as an individual document in the collection.
Try:
import json
dblist = []
for line in f:
dblist.append(JSON.loads(line))
You are trying to import a list of strings.
Related:
http://api.mongodb.com/python/current/tutorial.html#documents
https://api.mongodb.com/python/3.4.0/api/bson/json_util.html
How to convert a string to BSON?

Upload file (size <16MB) to MongoDB

I have a requirement to upload file to MongoDB. Currently I am saving files in a folder in current filesystem using Flask. Is there a way I can upload file to MongoDB without using GridFS? I believe I did something like this long before but I cannot recollect since its been longtime since I last used MongoDB.
Any file I select to upload is no more than 16MB in size.
Update: I tried this to convert image file using binData but it throws error global name binData is not defined.
import pymongo
import base64
import bson
# establish a connection to the database
connection = pymongo.MongoClient()
#get a handle to the test database
db = connection.test
file_meta = db.file_meta
file_used = "Headshot.jpg"
def main():
coll = db.sample
with open(file_used, "r") as fin:
f = fin.read()
encoded = binData(f)
coll.insert({"filename": file_used, "file": f, "description": "test" })
Mongo BSON (https://docs.mongodb.com/manual/reference/bson-types/) has binary data (binData) type for field.
Python driver (http://api.mongodb.com/python/current/api/bson/binary.html) supports it.
You can store file as array of bytes.
You code should be slightly modified:
Add import: from bson.binary import Binary
Encode file bytes using Binary: encoded = Binary(f)
Use encoded value in insert statement.
Full example below:
import pymongo
import base64
import bson
from bson.binary import Binary
# establish a connection to the database
connection = pymongo.MongoClient()
#get a handle to the test database
db = connection.test
file_meta = db.file_meta
file_used = "Headshot.jpg"
def main():
coll = db.sample
with open(file_used, "rb") as f:
encoded = Binary(f.read())
coll.insert({"filename": file_used, "file": encoded, "description": "test" })

Python and sqlite3 - importing and exporting databases

I'm trying to write a script to import a database file. I wrote the script to export the file like so:
import sqlite3
con = sqlite3.connect('../sqlite.db')
with open('../dump.sql', 'w') as f:
for line in con.iterdump():
f.write('%s\n' % line)
Now I want to be able to import that database. I have tried :
import sqlite3
con = sqlite3.connect('../sqlite.db')
f = open('../dump.sql','r')
str = f.read()
con.execute(str)
but I'm not allowed to execute more than one statement. Is there a way to get it to run an SQL script directly?
sql = f.read() # watch out for built-in `str`
cur.executescript(sql)
Documentation.
Try using
con.executescript(str)
Documentation
Connection.executescript(sql_script)
This is a nonstandard shortcut that creates an intermediate cursor object
by calling the cursor method, then calls the cursor’s executescript
method with the parameters given.
Or create the cursor first
import sqlite3
con = sqlite3.connect('../sqlite.db')
f = open('../dump.sql','r')
str = f.read()
cur = con.cursor()
cur.execute(str)

Categories

Resources