How to dump a collection to json file using pymongo - python

I am trying to dump a collection to .json file but after looking in pymongo tutorial I can not find any thing that relates to it.
Tutorial link: https://api.mongodb.com/python/current/tutorial.html

Just get all documents and save them to file e.g.:
from bson.json_util import dumps
from pymongo import MongoClient
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
file.write('[')
for document in cursor:
file.write(dumps(document))
file.write(',')
file.write(']')

The accepted solution produces an invalid JSON. It results in trailing comma , before the close square bracket ]. The JSON spec does not allow trailing commas. See this answer and this reference.
To build on the accepted solution I used the following:
from bson.json_util import dumps
from pymongo import MongoClient
import json
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
json.dump(json.loads(dumps(cursor)), file)

Here's another way of not saving a , before the closing square brackets. Also using with open to save some space.
filter = {"type": "something"}
type_documents = db['cluster'].find(filter)
type_documents_count = db['cluster'].count_documents(filter)
with open("type_documents.json", "w") as file:
file.write('[')
# Start from one as type_documents_count also starts from 1.
for i, document in enumerate(type_documents, 1):
file.write(json.dumps(document, default=str))
if i != type_documents_count:
file.write(',')
file.write(']')
It basically doesn't write the comma if number of iterations is equal to the number of documents (which is the last document it saves).

Complementing #kamilitw I use length of cursor to make a JSON file correctly. I use count() and if-else:
def writeToJSONFile(collection):
cursor = collection.find({})
file = open("collection.json", "w")
file.write('[')
qnt_cursor = 0
for document in cursor:
qnt_cursor += 1
num_max = cursor.count()
if (num_max == 1):
file.write(json.dumps(document, indent=4, default=json_util.default))
elif (num_max >= 1 and qnt_cursor <= num_max-1):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(',')
elif (qnt_cursor == num_max):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(']')
return file
So the JSON file will be correct in the and, because before as writing like this: [{"test": "test"},], now it's writing: [{"test":"test1"},{"test":"test2"}]

"""
#Author: Aseem Jain
#profile: https://www.linkedin.com/in/premaseem/
"""
import os
import pymongo
# configure credentials / db name
db_user = os.environ["MONGO_ATLAS_USER"]
db_pass = os.environ["MONGO_ATLAS_PASSWORD"]
db_name = "sample_mflix"
connection_string = f"mongodb+srv://{db_user}:{db_pass}#sharedcluster.lv3wx.mongodb.net/{db_name}?retryWrites=true&w=majority"
client = pymongo.MongoClient(connection_string)
db = client[db_name]
# create database back directory with db_name
os.makedirs(db_name, exist_ok=True)
# list all tables in database
tables = db.list_collection_names()
# dump all tables in db
for table in tables:
print("exporting data for table", table )
data = list(db[table].find())
# write data in json file
with open(f"{db.name}/{table}.json","w") as writer:
writer.write(str(data))
exit(0)

Using pymongo's json_util:
from bson.json_util import dumps
from pymongo import MongoClient
import json
db_client = MongoClient(mongo_connection_string)
collections = db.collection_name
for collectio in collections.find():
with open("collection.json", 'w') as file:
op_json = dumps(operation)
json.dump(op_json, file)

Related

How to dump data into Json file using python

How to dump data into Json file
*as can see in the below python code I am trying the dump data in Json file so but I am struggling to do it in python code *
import time
import json
import os
def long_function(name):
cache_path = 'cache.json'
if not os.path.isfile(cache_path):
with open(cache_path, 't') as json_file:
cache_file_data = [name]
jsondump(cache_file_data, json_file)
else:
with open(cache_path, 'r') as json_file:
cache_file_data = json.load(json_file)
if name in cache_file_data:
print("Name already exist")
return name
else:
cache_file_data.append(name)
for e in range(5):
time.sleep(1)
print(e+1)
with open(cache_path, 'w') as json_file:
jsondump(cache_file_data, json_file)
print("New Name added in cache")
return name
print(long_function('nitu'))
so please resolve my problem......please help me
import json
# JSON data:
x = '{ "organization":"New_holn",
"city":"Noida",
"country":"India"}'
# python object to be appended
y = {"pin":117845}
# parsing JSON string:
z = json.loads(x)
# appending the data
z.update(y)
# the result is a JSON string:
print(json.dumps(z))
This is nothing but follow this pattern and your so your code error is ..you are not defined file mode correctly in if condition
with open (cache_path. "t") as json_file:
Instead of
with open (cache_path. "w") as json_file:
And second thing is you are not doing dump data

Python memory error Save split csv python

I have all the data of a lead object from salesforce by python and I save it by csv.
but since there is a lot of information I get python memory error
.
**This code get python memory error code**
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="") as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
That's why when there are more than 1000 lines in the csv I want
csv are recorded as follows.
1 output1.csv (1000 row)
2 output2.csv (1000 row)
3 output3.csv ......
And I get the following error, what do I need to do so that I can get out this way?
I want to split the cvs and I put in with open csv
iterator = True, chunk size = 1000
Code
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
Error message
Traceback (most recent call last):
File "c:/Users/test/Documents/test/test5.py", line 44, in <module>
main()
File "c:/Users/test/Documents//test5.py", line 36, in main
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
TypeError: 'iterator' is an invalid keyword argument for open()
With this way I think that I will not get python error if there is another way they can teach me?
If anyone knows, please let me know.
data = sf.query_all(sosl)
This call retrieves all information into memory for the given query, which is SOQL, not SOSL.
Instead, use
data = sf.query_all_iter(sosl)
and iterate over the resulting iterator instead of data.items(), which will be much more memory-efficient as it won't attempt to retrieve all items at once.

Export pymongo collection to a JSON file

How to export pymongo collection to a JSON file, I have a huge collection which has approximately around 1 GB of data, I need an efficient way to export the data and create a JSON file out of it.
I am using the below code, as written in one of the stack overflow answers.
def getJSONFromDB():
db = GetMongo_client()
collection = db['collection_name']
cursor = collection.find({})
file = open("collection.json", "w")
file.write('[')
for document in cursor:
file.write(json.dumps(document))
file.write(',')
file.write(']')
But it gives me the following error:
TypeError: Object of type ObjectId is not JSON serializable
The pymongo documentation you pointed is obsolete. If you're using version 1.7 I recommend updating. With a more recent version you can do this:
from bson.json_util import dumps
dumps(l)
https://pymongo.readthedocs.io/en/stable/api/bson/json_util.html
Side answer: u'name', u'date', u'_id' etc are the names of the fields of the document on the database.
Hi it's end to end solution
Step 1: it's export all needs collections to json file
Step 2: it's import this json files to other db
import pymongo
import os
import json
from bson.json_util import dumps
import sys
import Consts
def replace_word(infile,old_word,new_word):
if not os.path.isfile(infile):
print ("Error on replace_word, not a regular file: "+infile)
sys.exit(1)
f1=open(infile,'r').read()
f2=open(infile,'w')
m=f1.replace(old_word,new_word)
f2.write(m)
def replase_coletion(db_name_list,client_prod,client_stg):
for db_name in db_name_list:
db_prod = client_prod[db_name]
os.makedirs(db_name, exist_ok=True)
tables_prod = db_prod.list_collection_names()
print(tables_prod)
for table in tables_prod:
print("exporting data for table", table )
data = list(db_prod[table].find())
json_data = dumps(data, indent=4)
# write data in json file
with open(f"{db_prod.name}/{table}.json", 'w') as file:
file.write(json_data)
replace_word(db_prod.name+"/"+table+".json","$oid","oid")
db_stg = client_stg[db_name]
tables_stg = db_stg.list_collection_names()
print(tables_stg)
for table in tables_stg:
db_stg.drop_collection(table)
for table in tables_prod:
try:
with open(f"{db_prod.name}/{table}.json") as read_file:
data = json.load(read_file)
print(data)
client_stg[db_name][table].insert_many(data)
except:
print("empty list")
db_stg = client_stg[db_name]
tables_stg = db_stg.list_collection_names()
print(tables_stg)
if __name__ == "__main__":
db_name_list = Consts.db_lists
client_prod = pymongo.MongoClient(Consts.from_bd)
client_stg = pymongo.MongoClient(Consts.to_db)
replase_coletion(db_name_list, client_prod, client_stg)

Creating JSON with python cgi returns only one row

I am trying to create a json from my sql query using python cgi, however it only returns one row in my table, I am not sure what I am doing wrong. It probably has to do with my list declaration but I am not sure how to fix this
#!/usr/bin/env python3
import cgi
import cgitb
import sys
import json
import cx_Oracle
cgitb.enable()
fs = cgi.FieldStorage()
with open("../../password.txt", 'r') as pwf:
pw=pwf.read().strip()
imagequery = """
SELECT PLACENAME_ID, COMMENT FROM PLACENAMES
"""
conn = cx_Oracle.connect(dsn="gen", user="7", password=pw)
c = conn.cursor()
c.execute(imagequery)
place_id = []
comment = []
for row in c:
place_id =row[0]
comment = row[1]
result = {}
result['place_id'] = place_id
result['comment'] = comment
sys.stdout.write("Content-Type: application/json")
sys.stdout.write("\n")
sys.stdout.write("\n")
sys.stdout.write(json.dumps(result,indent=1))
sys.stdout.write("\n")
sys.stdout.close()
Result
{
"place_id": 5326,
"comment": "Graveyard"
}
Create dictionary out of each entry and append it to an array. Your JSON will then contain an array with all the database entries.
places=[]
for row in c:
place = {}
place['place_id'] = row[0]
place['comment'] = row[1]
places.append(place)
result = {}
result['places'] = places

python exporting csv file from sqlite database the values which has capital " L" are truncated

import sqlite3
import pandas as pd
f = open('output.csv', 'w')
connection = sqlite3.connect('storage.sqlite')
cursor = connection.cursor()
cursor.execute('select * from product')
while True:
df = pd.DataFrame(cursor.fetchmany(1000))
if len(df) == 0:
break
else:
df.to_csv(f, header=False)
f.close()
cursor.close()
connection.close()
here the data was "Long Lad as" so while importing i got "ong ad as " in different cells of csv.
small "l" are not effected but capital L are being removed while exporting.
please help to fix this bug
See your image - you have set L as Other separator - so you remove L when you import file.
CSV is normal text file so you can open in normal editor and see if you have this L in text.

Categories

Resources