MongoDB / Python: Upload JSONs to mondodb database with authentication

MongoDB / Python: Upload JSONs to mondodb database with authentication - python

I have to overcome problem which is probably nothing to most advanced users of mongoDB. I have to upload few JSON files to mongoDB database with authentication but it is not working as easy as I though it will. I am amateur at mongoDB and still at python so please be kind for me :)
# PART FOR CREATING JSON FILES (WORKING)
pliki = "/users/user/CSVtoGD/"
files = Path(pliki).glob('*.csv')
for f in files:
print(json.dumps(csv_rows, indent=19))
read_CSV(f, str(f.with_suffix('.json')))
#PART FOR UPLOADING EXAMPLE FILE TO MONGODB
import json
import pymongo
from pymongo import MongoClient
#conn = pymongo.MongoClient('mongodb://user:password#1.1.1.1/')
#db = conn['TTF-Files']
#coll = db['JSON files of TTF from game assets']
uri = "mongodb://user:password#1.1.1.1/default_db?authSource=admin"
client = MongoClient(uri)
with open('/Users/user/CSVtoGD/FILE.json', 'r') as data_file:
data = json.loads(data_file)
# if pymongo >= 3.0 use insert_many() for inserting many documents
collection_currency.insert_one(data_file)
client.close()
I receive error:
TypeError: the JSON object must be str, bytes or bytearray, not TextIOWrapper
which honestly I can not understand with my level of knowledge.
JSON files looks like that:
[
{
"Name": "path/to/file/font2.ttf",
"Vendor": "Comfortaa",
"Other1": "Regular",
"Other2": "Comfortaa",
"Other3": "Comfortaa",
"Other4": "Comfortaa",
"Other5": "Version 2.004 2013",
"Other6": "JohanAakerlund: Comfortaa Regular: 2011",
"Other7": "Johan Aakerlund - aajohan",
"Other8": "Johan Aakerlund",
"Other9": "Copyright (c) 26.12.2011, Johan Aakerlund (aajohan#gmail.com), with Reserved Font Name \"Comfortaa\". This Font Software is licensed under the SIL Open Font License, Version 1.1. http://scripts.sil.org/OFL",
"Other10": "http://scripts.sil.org/OFL",
"Other11": "",
"Other12": "Comfortaa"
},
[
{
"Name": "path/to/file/font2.ttf",
"Vendor": "Comfortaa",
"Other1": "Regular",
"Other2": "Comfortaa",
"Other3": "Comfortaa",
"Other4": "Comfortaa",
"Other5": "Version 2.004 2013",
"Other6": "JohanAakerlund: Comfortaa Regular: 2011",
"Other7": "Johan Aakerlund - aajohan",
"Other8": "Johan Aakerlund",
"Other9": "Copyright (c) 26.12.2011, Johan Aakerlund (aajohan#gmail.com), with Reserved Font Name \"Comfortaa\". This Font Software is licensed under the SIL Open Font License, Version 1.1. http://scripts.sil.org/OFL",
"Other10": "http://scripts.sil.org/OFL",
"Other11": "",
"Other12": "Comfortaa"
},
EDIT:
I have modified the script:
#zapisywanie do bazy danych
import pymongo
import json
#from pymongo import MongoClient, InsertOne
myclient = pymongo.MongoClient("mongodb://user:password#10.1.1.205:27017/default_db?authSource=admin")
db = myclient.TTF
collection = myclient.TTF
with open("/users/user/CSVtoGD/file.json") as file:
file_data = json.load(file)
collection.insert_many(file_data)
But now I have error:
TypeError: 'Collection' object is not callable. If you meant to call the 'insert_many' method on a 'Database' object it is failing because no such method exists.
It means that I am not even connected to database?

Related

Get a Json file that represents device(object) from a specific folder and generating kafka event

I need make tool using Python that takes JSON file(device) from a folder(devices) and generates kafka events for the device.events topic. before generating event it has to check if device exists in folder.
I wrote this but i still dont know how to open file inside the folder.
import json
test_devices = r'''
{
"data": ["{\"mac_address\": null, \"serial_number\": \"BRUCE03\", \"device_type\": \"STORAGE\", \"part_number\": \"STCHPRTffOM01\", \"extra_attributes\": [], \"platform_customer_id\": \"a44a5c7c65ae11eba916dc31c0b885\", \"application_customer_id\": \"4b232de87dcf11ebbe01ca32d32b6b77\"}"],
"id": "b139937e-5107-4125-b9b0-d05d17ad2bea",
"source":"CCS",
"specversion":"1.0",
"time": "2021-01-13T14:44:18.972181+00:00",
"type":"` `",
"topic":"` `"
}
'''
data = json.loads(test_devices)
print(type(test_devices))
print(data)

Kafka is an implementation detail and doesn't relate to your questions.
how to open file inside the folder
import json
with open("/path/to/device.json") as f:
content = json.load(f)
print(content)
has to check if device exists in folder
import os.path
print("Exists? :", os.path.exists("/path/to/device.json"))

INFO: Could not find files for the given pattern(s) - VSC and python

I've been following a tutorial to learn Python and smart contracts (I'm totally new to coding), and while following every step to the letter, VSCode keeps returning the following message >INFO: Could not find files for the given pattern(s).
Although it still returns whatever action I ask it to do:
from solcx import compile_standard, install_solc
import json
from web3 import Web3
import os
from dotenv import load_dotenv
load_dotenv()
install_solc("0.6.0")
with open("./SimpleStorage.sol", "r") as file:
simple_storage_file = file.read()
compiled_sol = compile_standard(
{
"language": "Solidity",
"sources": {"SimpleStorage.sol": {"content": simple_storage_file}},
"settings": {
"outputSelection": {
"*": {"*": ["abi", "metadata", "evm.bytecode", "evm.sourceMap"]}
}
},
},
solc_version="0.6.0",
)
with open("compiled_code.json", "w") as file:
json.dump(compiled_sol, file)
# get bytecode
bytecode = compiled_sol["contracts"]["SimpleStorage.sol"]["SimpleStorage"]["evm"][
"bytecode"
]["object"]
# get ABI
abi = compiled_sol["contracts"]["SimpleStorage.sol"]["SimpleStorage"]["abi"]
w3 = Web3(Web3.HTTPProvider("HTTP://127.0.0.1:7545"))
chain_id = 1337
my_address = "0x237d38135A752544a4980438c3dd9dFDe409Fb49"
private_key = os.getenv("PRIVATE_KEY")
# create the contract in python
SimpleStorage = w3.eth.contract(abi=abi, bytecode=bytecode)
# get the latest transaction
nonce = w3.eth.getTransactionCount(my_address)
# 1. build a transation
# 2. Sign a transation
# 3. Send a transation
transaction = SimpleStorage.constructor().buildTransaction(
{"chainId": chain_id, "from": my_address, "nonce": nonce})
signed_txn = w3.eth.account.sign_transaction(
transaction, private_key=private_key)
private_key = os.getenv("PRIVATE_KEY")
# Send the signed transaction
print("Deploying contract...")
tx_hash = w3.eth.send_raw_transaction(signed_txn.rawTransaction)
tx_receipt = w3.eth.wait_for_transaction_receipt(tx_hash)
print("Deployed!")
# working with the contract
# contract address
# Contract ABI
simple_storage = w3.eth.contract(address=tx_receipt.contractAddress, abi=abi)
# Call > simulate making the call and getting the return value, doesn't make a change on the blockchain
# Transact > actually makes a state change
# Initial value of favorite number
print(simple_storage.functions.retrieve().call())
print("Updating contract...")
store_transaction = simple_storage.functions.store(15).buildTransaction(
{"chainId": chain_id, "from": my_address, "nonce": nonce + 1}
)
signed_store_txn = w3.eth.account.sign_transaction(
store_transaction, private_key=private_key)
send_store_tx = w3.eth.send_raw_transaction(signed_store_txn.rawTransaction)
tx_receipt = w3.eth.wait_for_transaction_receipt(send_store_tx)
print("Updated!")
print(simple_storage.functions.retrieve().call())
And the result in the terminal is :
PS C:\Users\chret\Documents\demo\web3_py_simple_storage> python deploy.py
INFO: Could not find files for the given pattern(s).
Deploying contract...
Deployed!
0
Updating contract...
Updated!
15
So, I'm fairly confused, should I just ignore the warning "Could not find files for the given pattern(s)" ? Or is there anything I can do to fix it/is it going to create issues as I keep coding in those files? I've tried relocating the folders, including the path in Environment variables/PATH, but it doesn't stop this message from showing up.
It's been doing this from the beginning and nowhere does it show on the video I'm following (freecodecamp 16h video tutorial on youtube about blockchain).
Thank you!

you're importing solcx
during the import it runs solcx\install.py
near the end of that file it has this code
try:
# try to set the result of `which`/`where` as the default
_default_solc_binary = _get_which_solc()
except Exception:
# if not available, use the most recent solcx installed version
if get_installed_solc_versions():
set_solc_version(get_installed_solc_versions()[0], silent=True)
the _get_which_colc() function is defined earlier in the file, and runs this line (for windows)
response = subprocess.check_output(["where.exe", "solc"], encoding="utf8").strip()
which errors and sends the message you are worried about to the console
INFO: Could not find files for the given pattern(s).
This error is expected, and handled in the except Exception: clause (see above)
So nothing to worry about, you can ignore the warning :)

I too had this problem.
I wasn't able to fix it on my windows environment. But,
The github links to a tutorial to setup the brownie stack in an Ubuntu environment on Windows, and this has been working flawlessly for me. And it's easy to setup.
https://medium.com/#cromewar/how-to-setup-windows-10-11-for-smart-contract-development-and-brownie-e7d8d13555b3
It's not mentioned in the article, but currently (26/11/2021), you will want to install Node v16, and ganache v7.0.0-alpha.2 instead, due to compatibility issues.
Refer to link for NVM & node versions.
https://learn.microsoft.com/en-us/windows/dev-environment/javascript/nodejs-on-wsl

a smart contract man.sol, and it contains two(or one) contract in a file, like this:
pragma solidity ^0.8.0;
import "./SafeERC20.sol";
contract mainContract {
... (Any code can be here ...)
}contract childContract {
... (Other code here)}
and our python file. a.py:
import json
import os
import web3.eth
from web3 import Web3, HTTPProvider
from solcx import install_solc, set_solc_version,compile_standard
from dotenv import load_dotenv#here install solidity version
install_solc('v0.8.0')
set_solc_version('v0.8.0')
file_path = "."
name = "main.sol"
input = {
'language': 'Solidity',
'sources': {
name: {'urls': [file_path + "/" + name]}},
'settings': {
'outputSelection': {
'*': {
'*': ["abi", "metadata", "evm.bytecode", "evm.bytecode.sourceMap"],
},
'def': {name: ["abi", "evm.bytecode.opcodes"]},
}
}
}
output = compile_standard(input, allow_paths=file_path)
contracts = output["contracts"]
with open('compiled_code.json', "w") as file:
json.dump(output, file)
bytecode = contracts["SC-.sol"]["mainContract"]["evm"]["bytecode"]["object"]
abi = contracts["main.sol"]["mainContract"]["abi"]
# Deploy on local ganache# w3 = Web3(Web3.HTTPProvider("HTTP://127.0.0.1:7545"))
# chainId = 1337
# myAddress = "0x6235207DE426B0E3739529F1c53c14aaA271D..."
# privateKey = "0xdbe7f5a9c95ea2df023ad9......."
#Deploy on rinkeby infura rinkebyw3 = Web3(Web3.HTTPProvider("https://rinkeby.infura.io/v3/......"))
chainId = 4
myAddress = "0xBa842323C4747609CeCEd164d61896d2Cf4..."
privateKey ="0x99de2de028a52668d3e94a00d47c4500db0afed3fe8e40..."
SCOnline = w3.eth.contract(abi=abi, bytecode=bytecode)
nonce = w3.eth.getTransactionCount(myAddress)
transaction = SCOnline.constructor().buildTransaction({
"gasPrice": w3.eth.gas_price, "chainId": chainId, "from": myAddress, "nonce": nonce
})
signedTrx = w3.eth.account.sign_transaction(transaction, private_key= privateKey)
txHash = w3.eth.send_raw_transaction(signedTrx.rawTransaction)
txReceipt = w3.eth.wait_for_transaction_receipt(txHash)

Indexing avro file to elasticsearch in bulk

I wrote this short simple script
from elasticsearch import Elasticsearch
from fastavro import reader
es = Elasticsearch(['someIP:somePort'])
with open('data.avro', 'rb') as fo:
avro_reader = reader(fo)
for record in avro_reader:
es.index(index="my_index", body=record)
It works absolutely fine. Each record is a json and Elasticsearch can index json files. But rather than going one by one in a for loop, is there a way to do this in bulk? Because this is very slow.

There are 2 ways to do this.
Use Elasticsearch Bulk API and requests python
Use Elasticsearch python library which internally calls the same bulk API
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from fastavro import reader
es = Elasticsearch(['someIP:somePort'])
with open('data.avro', 'rb') as fo:
avro_reader = reader(fo)
records = [
{
"_index": "my_index",
"_type": "record",
"_id": j,
"_source": record
}
for j,record in enumerate(avro_reader)
]
helpers.bulk(es, records)

How to convert JSON data to PDF using python script

I want to convert JSON data to PDF which is getting from API.
example JSON data
{
"data": [
{
"state": "Manchester",
"quantity": 20
},
{
"state": "Surrey",
"quantity": 46
},
{
"state": "Scotland",
"quantity": 36
},
{
"state": "Kent",
"quantity": 23
},
{
"state": "Devon",
"quantity": 43
},
{
"state": "Glamorgan",
"quantity": 43
}
]
}
I found this script:
http://code.activestate.com/recipes/578979-convert-json-to-pdf-with-python-and-xtopdf/
but getting error
no module PDFWriter
Is there any another way to convert JSON Data PDF.
PLEASE HELP.

the module PDFWriter is in xtopdf
PDFWriter - a core class of the xtopdf toolkit - can now be used with
a Python context manager, a.k.a. the Python with statement.
( http://code.activestate.com/recipes/578790-use-pdfwriter-with-context-manager-support/ )
how to install xtopdf is in https://bitbucket.org/vasudevram/xtopdf :
Installation and usage:
To install the files, first make sure that you have downloaded and
installed all the prerequisities mentioned above, including setup
steps such as adding needed directories to your PYTHONPATH. Then, copy
all the files in xtopdf.zip into a directory which is on your
PYTHONPATH.
To use any of the Python programs, run the .py file as:
python filename.py
This will give a usage message about the correct usage and arguments
expected.
To run the shell script(s), do the same as above.
Developers can look at the source code for further information.
an alternative is to use pdfdocument to create the pdf, it can be installed using pip ( https://pypi.python.org/pypi/pdfdocument )
parse the data from the json data ( How can I parse GeoJSON with Python, Parse JSON in Python ) and print it as pdf using pdfdocument ( https://pypi.python.org/pypi/pdfdocument )
import json
data = json.loads(datastring)
from io import BytesIO
from pdfdocument.document import PDFDocument
def say_hello():
f = BytesIO()
pdf = PDFDocument(f)
pdf.init_report()
pdf.h1('Hello World')
pdf.p('Creating PDFs made easy.')
pdf.generate()
return f.getvalue()

from json2html import *
import json
import tempfile
class PdfConverter(object):
def __init__(self):
pass
def to_html(self, json_doc):
return json2html.convert(json=json_doc)
def to_pdf(self, html_str):
return pdfkit.from_string(html_str, None)
def main():
stowflw = {
"data": [
{
"state": "Manchester",
"quantity": 20
},
{
"state": "Surrey",
"quantity": 46
},
{
"state": "Scotland",
"quantity": 36
},
{
"state": "Kent",
"quantity": 23
},
{
"state": "Devon",
"quantity": 43
},
{
"state": "Glamorgan",
"quantity": 43
}
]
}
pdfc = PdfConverter()
with open("sample.pdf", "wb") as pdf_fl:
pdf_fl.write(pdfc.to_pdf(pdfc.to_html(json.dumps(stowflw))))
install json2html
install pdfkit (requires wkhtmltox)

when you hit this code it will generate a pdf for this url (API).
import pdfkit
pdfkit.from_url('https://api.covid19api.com/summary', 'india.pdf')
you can also generate an pdf from a different formats like .file, .html , .text, multiple url
import json
import requests
response = requests.get('https://api.covid19api.com/summary').text
# loads converts a string to a JSON object
json_object = json.loads(response)
# json. dumps converts a json object to a string
print(json.dumps(json_object, indent=1))
#different formats
pdfkit.from_url('http://aj7t.me', 'output.pdf')
pdfkit.from_file('test.html', 'output.pdf')
pdfkit.from_string('Hello!', 'output.pdf')
👍For more information,
please check the documentation!

Bulk Index data in Elasticsearch with sequential IDs

I am using this code to bulk index all data in Elasticsearch using python:
from elasticsearch import Elasticsearch, helpers
import json
import os
import sys
import sys, json
es = Elasticsearch()
def load_json(directory):
for filename in os.listdir(directory):
if filename.endswith('.json'):
with open(filename,'r') as open_file:
yield json.load(open_file)
helpers.bulk(es, load_json(sys.argv[1]), index='v1_resume', doc_type='candidate')
I know that if ID is not mentioned ES gives a 20 character long ID by itself, but I want it to get indexed starting from ID = 1 till the number of documents.
How can I achieve this ?

In elastic search if you don't pick and ID for your document an ID is automatically created for you, check here in
elastic docs:
Autogenerated IDs are 20 character long, URL-safe, Base64-encoded GUID
strings. These GUIDs are generated from a modified FlakeID scheme which
allows multiple nodes to be generating unique IDs in parallel with
essentially zero chance of collision.
If you like to have custom ids you need to build them yourself, using similar syntax:
[
{'_id': 1,
'_index': 'index-name',
'_type': 'document',
'_source': {
"title": "Hello World!",
"body": "..."}
},
{'_id': 2,
'_index': 'index-name',
'_type': 'document',
'_source': {
"title": "Hello World!",
"body": "..."}
}
]
helpers.bulk(es, load_json(sys.argv[1])
Since you are decalring the type and index inside your schema you don't have to do it inside helpers.bulk() method. You need to change the output of 'load_json' to create list with dicts (like above) to be saved in es (python elastic client docs)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

MongoDB / Python: Upload JSONs to mondodb database with authentication - python

Related

Get a Json file that represents device(object) from a specific folder and generating kafka event

INFO: Could not find files for the given pattern(s) - VSC and python

Indexing avro file to elasticsearch in bulk

How to convert JSON data to PDF using python script

Bulk Index data in Elasticsearch with sequential IDs

Categories

Resources