I created a class of functions that provision some cloud infrastructure.
response = self.ecs_client.register_task_definition(
containerDefinitions=[
{
"name": "redis-283C462837EF23AA",
"image": "redis:3.2.7",
"cpu": 1,
"memory": 512,
"essential": True,
},
...
This is a very long json, I show just the beginning.
Then I refactored the code to use a parameter instead of the hard coded hash, memory and cpu.
response = self.ecs_client.register_task_definition(
containerDefinitions=[
{
"name": f"redis-{git_hash}",
"image": "redis:3.2.7",
"cpu": {num_cpu},
"memory": {memory_size},
"essential": True,
},
...
I read the values of git_hash, num_cpu and memory_size from a config file prior to this code.
Now, I also want to read to entire json from a file.
The problem is that if I save {num_cpu} etc. in a file, the string interpolation won't work.
How can I extract the json from my logic and still use string interpolation or variables?
You can use Template from string.
{
"name": "redis-${git_hash}",
"image": "redis:3.2.7",
"cpu": ${num_cpu},
"memory": ${memory_size},
"essential": true
}
from string import Template
import json
if __name__ == '__main__':
data = dict(
num_cpu = 1,
memory_size = 1,
git_hash = 1
)
with open('test.json', 'r') as json_file:
content = ''.join(json_file.readlines())
template = Template(content)
configuration = json.loads(template.substitute(data))
print(configuration)
# {'name': 'redis-1', 'image': 'redis:3.2.7', 'cpu': 1, 'memory': 1, 'essential': True}
Opinion: I think the overall approach is wrong. There is a reason why this method is not as popular as others. You can separate your configuration into two files (1) a static list of options and (2) your compact changeable configuration, and compose them in your code.
EDIT: You can create an object which reads the configuration from a standard (static or changeable) JSON file FileConfig. And then compose them using another object, something line ComposedConfig.
This will allow you to extend the behaviour, and add, for example, a run-time configuration in the mix. This way the configuration from your JSON file no longer depends on the run-time params, and you can separate what is changeable from what is static in your system.
PS: The get method is just an example for explaining the composed behaviour; you can use other methods/designs.
import json
from abc import ABC, abstractmethod
class Configuration(ABC):
#abstractmethod
def get(self, key: str, default: str) -> str:
pass
class FileConfig(Configuration):
def __init__(self, file_path):
self.__content = {}
with open(file_path, 'r') as json_file:
self.__content = json.load(json_file)
def get(self, key: str, default: str) -> str:
return self.__content.get(key, default)
class RunTimeConfig(Configuration):
def __init__(self, option: str):
self.__content = {'option': option}
def get(self, key: str, default: str) -> str:
return self.__content.get(key, default)
class ComposedConfig:
def __init__(self, first: Configuration, second: Configuration):
self.__first = first
self.__second = second
def get(self, key: str, default: str) -> str:
return self.__first.get(key, self.__second.get(key, default))
if __name__ == '__main__':
static = FileConfig("static.json")
changeable = FileConfig("changeable.json")
runTime = RunTimeConfig(option="a")
config = ComposedConfig(static, changeable)
alternative = ComposedConfig(static, runTime)
print(config.get("image", "test")) # redis:3.2.7
print(alternative.get("option", "test")) # a
Related
I'm trying to create and run a checkpoint of great expectation, for this I created this Python script:
import sys
from datetime import datetime
from great_expectations.data_context import DataContext
from great_expectations.validation_operators.types.validation_operator_result import (
ValidationOperatorResult,
)
import great_expectations as gx
from airflow import AirflowException
def execute_new_checkpoint(
ge_root_dir: str,
datasource_name: str,
data_connector_name: str,
data_asset_name: str,
checkpoint_name: str,
) -> None:
""" Execute a new checkpoint """
context: DataContext = gx.data_context.DataContext(ge_root_dir)
context.add_checkpoint(
name=checkpoint_name,
batch_request={
"datasource_name": datasource_name,
"data_connector_name": data_connector_name,
"data_asset_name": data_asset_name,
},
)
results: ValidationOperatorResult = context.run_checkpoint(
checkpoint_name=checkpoint_name,
run_id=f"airflow: {checkpoint_name}:{datetime.now():%Y%m%dT%H%M%S}",
)
if not results["success"]:
raise AirflowException("Validation of the data is not successful ")
if __name__ == "__main__":
...
execute_new_checkpoint(*sys.argv[1:])
And when I run this obtain a Warning and an Error
$ python ge_run_checkpoint.py /path/to/great_expectations/ my_datasource my_data_connector my_data_asset my_checkpoint
...
{util.py:56} WARNING - Instantiating class from config without an explicit class_name is dangerous. Consider adding an explicit class_name for tests1
...
KeyError: "Neither config : {'name': 'tests1', 'batch_request': {'datasource_name': 'my_datasource', 'data_connector_name': 'my_data_connector', 'data_asset_name': 'my_data_asset'}} nor config_defaults : {} contains a class_name key."
I've tried to add a class_nam Asset and ConfiguredAssetSqlDataConnector, but also obtain a error:
- Please verify that the class named `ConfiguredAssetSqlDataConnector` exists.
I want to create and run a checkpoint of great expectation for validate the expectations.
I improved the script with:
def execute_new_checkpoint(
ge_root_dir: str,
datasource_name: str,
data_connector_name: str,
data_asset_name: str,
checkpoint_name: str,
expectation_name: str = "default",
) -> None:
context: DataContext = gx.data_context.DataContext(ge_root_dir)
context.add_checkpoint(
name=checkpoint_name,
config_version=1.0,
class_name="SimpleCheckpoint",
run_name_template=f"%Y%m%d-%H%M%S-{re.sub('[_ ]', '-', checkpoint_name.lower())}",
validations=[
{
"batch_request": {
"datasource_name": datasource_name,
"data_connector_name": data_connector_name,
"data_asset_name": data_asset_name,
"data_connector_query": {"index": -1},
},
"expectation_suite_name": expectation_name,
}
],
)
...
It was necessary to modify the built checkpoint and add the expectation suit name.
This based the jupyters created by the command:
$ great_expectations --v3-api suite new
Json file
{
"payloadFormatVersion": "9.0",
"payload": {
"ServiceConfiguration": {
"LoggingSettings": {
"NumberOfLogFilesToKeep": 7,
"LogFileSizeBytes": 0,
"LogFolderPath": "C:\\demo\\logs\\feature\\",
"EnvironmentType": "testingenv",
"DataRelayLogSink": {
"PeriodInSeconds": 60,
"TargetAddress": "http://localhost:portNumber/dumm1",
"TargetTokenAddress": "http://localhost:portnumber/token",
"PayloadType": "somedata",
"TokenCredentials": {
"ClientId": "testclientid",
"ClientSecret": "testclientsecret",
"GrantType": "testgranttype"
}
}
},
}
}
JSON Content
def repalcejsonForSpecificKey(keyPath,fileName):
filePath = "C:\\rajesh\\Configurations\\" + fileName + "\\" + fileName + ".json"
print(filePath)
Dict = {}
with open(filePath) as f:
superHeroSquad = json.load(f)
duplicatedict={}
duplicatedict=superHeroSquad
testDict=getDictonaryItems(keyPath[0],**superHeroSquad)
print(testDict)
def getDictonaryItems(searchKey, duplicatedict):
if searchKey in duplicatedict.keys():
testDict = duplicatedict[searchKey]
return testDict
keyPath = ["payload","ServiceConfiguration", "TokenSettings", "ClientId"]
fileName="vestas.sdh.dr.gateway"
repalcejsonForSpecificKey(keyPath,fileName)
Below is my plan
Method1 accepts 2 arguments JsonParsingKeyWhereToBereplaced, filename
Redirecting Json file to dictionary
call the method2 recursively where it accepts 2 arguments, one with searchKey and other is dictonary, this method will return all the key&values from specific Key passed on the method call
Recursively call this method until and unless you reach downstream key and update the value if found
Trying to update nested value from Json file using python language
Note: I was able to update the value in the Json file directly with below line
superHeroSquad ['payload']['ServiceConfiguration']['TokenSettings']['ClientId'] = "text"
But not like below
superHeroSquad[keyPath[0][keyPath[1]][keyPath[2]][keyPath[3]] = "text"
You could traverse your json as a map and replace the specific values like this:
import json
def replace_json_for_specific_key(file: str, key_pairs: dict[str, any]):
content = json.load(open(file))
for k, v in key_pairs.items():
keys = k.split(".")
element = content
for key in keys[:-1]:
element = element.setdefault(key, {})
element[keys[-1]] = v
tmp_file = open(file, "w")
json.dump(content, tmp_file)
tmp_file.flush()
if __name__ == '__main__':
replace_json_for_specific_key(
"input.json",
{
"payload.ServiceConfiguration.LoggingSettings.NumberOfLogFilesToKeep": 90,
"payload.ServiceConfiguration.LoggingSettings.DataRelayLogSink.TokenCredentials.ClientId": "anothervalue"
}
)
Notice it will allow you to replace several values at once. You'll need to pass the dot (.) separated path to the specific key.
I am trying to patch out the Azure Digital Twin API in my code. Currently I have achieved a way which works but is probably not the most Pythonic by nesting with patch statements.
What is the best way to rewrite this such that I can use it in multiple test functions and change the return values if needed?
def test_create_digital_twin(self):
with patch("endpoints.digital_twin.ClientSecretCredential"):
with patch("azure_digital_twin.create_digital_twin.DigitalTwinsClient.query_twins",) as mock_query:
with patch("azure_digital_twin.create_digital_twin.DigitalTwinsClient.upsert_digital_twin") as mock_upsert_twin:
with patch("azure_digital_twin.create_digital_twin.DigitalTwinsClient.upsert_relationship") as mock_upsert_relationship:
mock_query.return_value = []
mock_upsert_twin.return_value = {
"$dtId":"spc-1",
"$etag":"random",
"$model":"dtmi:digitaltwins:rec_3_3:core:Asset;1"
}
mock_upsert_relationship.return_value = {
"$relationshipId":"spc-1-hasPart-spc-2",
"$sourceId":"spc-1",
"$targetId" : "spc-2",
"$relationshipName":"hasPart"
}
response = self.client.post(
endpoint,
params={"node" : "spc-1"},
)
assert response.status_code == status.HTTP_201_CREATED
You might use an ExitStack from the contextlib module.
from contextlib import ExitStack
def test_create_digital_twin(self):
with ExitStack() as es:
def make_azure_patch(x):
return es.enter_context(patch(f'azure_digital_twin.create_digital_twin.DigitalTwinsClient.{x}'))
es.enter_context("endpoints.digital_twin.ClientSecretCredential"))
mock_query = make_patch("query_twins")
mock_upsert_twin = make_patch("upsert_digital_twin")
mock_upsert_relationship = make_patch("upsert_relationship")
mock_query.return_value = []
mock_upsert_twin.return_value = {
"$dtId":"spc-1",
"$etag":"random",
"$model":"dtmi:digitaltwins:rec_3_3:core:Asset;1"
}
mock_upsert_relationship.return_value = {
"$relationshipId":"spc-1-hasPart-spc-2",
"$sourceId":"spc-1",
"$targetId" : "spc-2",
"$relationshipName":"hasPart"
}
response = self.client.post(
endpoint,
params={"node" : "spc-1"},
)
assert response.status_code == status.HTTP_201_CREATED
make_azure_patch is just a helper function to reduce the length of the lines creating three of the individual patches.
I'm creating an application that stores it's config in a dictionary. I know I can write this to a JSON file and read this every time the app starts. But the problem is that this dictionary also contains objects. Like so(LED is an imported module with the classes APALedstrip and Arduino)
rooms['livingroom'] = {
"room":data.room(name = 'livingroom',dataKeys = dataKeys),
"lights":{
"LedStrip":LED.APALedstrip(name = 'livingroom',
room = 'livingroom')
}
}
rooms['bed'] = {
"room":data.room(name = 'bed', dataKeys = dataKeys),
"lights":{
"LedStrip":LED.Arduino(name ='bed',
serialPort = 'ttyUSB0',
room = 'livingroom',
master = {'room':'livingroom', 'light':'LedStrip'},
roomSensors = 'livingroom')
}
}
I'm curious is it also possible to store this in an JSON file like so? And when it's imported into a dictionary that the objects are still created?
You need to serialize your objects. One way is to use "pickle".
Pickle convert an object to bytes, so the next step is to convert them to string using base64.
I choose base64 because it's safe for non-ASCII characters
In order to automatically save and retrieve the rooms use
save_rooms() and retrieve_rooms()
import codecs
import json
import pickle
def save_rooms(rooms):
for room in rooms:
# find all LedStrip objects
if 'lights' in rooms[room] and 'LedStrip' in rooms[room]['lights']:
lights = rooms[room]['lights']['LedStrip']
# encode object to bytes with pickle and then to string with base64
rooms[room]['lights']['LedStrip'] = codecs.encode(pickle.dumps(lights),
"base64").decode()
with open("rooms.json", "w") as f:
json.dump(rooms, f)
def retrieve_rooms():
with open("rooms.json") as f:
rooms = json.load(f)
for room in rooms:
# find all LedStrip objects
if 'lights' in rooms[room] and 'LedStrip' in rooms[room]['lights']:
lights = rooms[room]['lights']['LedStrip']
# decode from string to bytes with base64 and then from bytes to object with pickle
rooms[room]['lights']['LedStrip'] = pickle.loads(codecs.decode(lights.encode(), "base64"))
return rooms
rooms = {}
rooms['livingroom'] = {
"room": data.room(name='livingroom', dataKeys=dataKeys),
"lights": {
"LedStrip": LED.APALedstrip(name='livingroom',
room='livingroom')
}
}
rooms['bed'] = {
"room": data.room(name='bed', dataKeys=dataKeys),
"lights": {
"LedStrip": LED.Arduino(name='bed',
serialPort='ttyUSB0',
room='livingroom',
master={'room': 'livingroom', 'light': 'LedStrip'},
roomSensors='livingroom')
}
}
save_rooms(rooms)
loaded_rooms = retrieve_rooms()
In addition I implemented the logic so you can save any variation of rooms as long as you keep the structure the same.
ex.
rooms['kitchen'] = {
"room": data.room(name='kitchen', dataKeys=dataKeys),
"lights": {
"LedStrip": LED.APALedstrip(name='kitchen',
room='kitchen')
}
}
I receive dicts such as (without knowing the exact structure in advance)
{
'a': 1,
'id': UUID('6b3acb30-08bf-400c-bc64-bf70489e388c'),
}
This dict is not directly serializable, but when casting the value of id to an str - it is:
import json
import uuid
print(json.dumps({
'a': 1,
'id': str(uuid.UUID('6b3acb30-08bf-400c-bc64-bf70489e388c')),
}))
# outputs {"a": 1, "id": "6b3acb30-08bf-400c-bc64-bf70489e388c"}
In the general case where I have elements which need to be casted to an str before being serializable, is there a generic (pythonic) way to make the transformation automatically?
The best option is to override JSONEncoder.default method:
class MyJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, uuid.UUID):
return str(o)
return super().default(o)
print( MyJSONEncoder().encode(data) )
If you want to stringify everything that the default encoder cannot handle, you may use the following trick, although I would recommend to take control over the types you want to support.
class MyJSONEncoder(json.JSONEncoder):
def default(self, o):
try:
return super().default(o)
except TypeError:
return str(o)
print( json.dumps(data, cls=MyJSONEncoder) )
DOCS: https://docs.python.org/3/library/json.html#json.JSONEncoder.default
No there is not, but you can check for the data items individually and convert when needed. This way you do not need to know the data structure in advance. Consider the following:
import json
import uuid
data = {
'a': 1,
'id': UUID('6b3acb30-08bf-400c-bc64-bf70489e388c')
}
for k, v in data.items():
try:
json.dumps(v)
except TypeError:
data[k] = str(uuid.v)
print(json.dumps(data))
# outputs {"a": 1, "id": "6b3acb30-08bf-400c-bc64-bf70489e388c"}