I'm migrating an application that formerly ran on IBM's DoCloud to their new API based off of Watson. Since our application doesn't have data formatted in CSV nor a separation between the model and data layers it seemed simpler to upload an LP file along with a model file that reads the LP file and solves it. I can upload and it claims to solve correctly but returns empty solve status. I've also output various model info (e.g. number of variables) and everything is zeroed out. I've confirmed the LP isn't blank - it has a trivial MILP.
Here is my model code (most of which is taken directly from the example at https://dataplatform.cloud.ibm.com/exchange/public/entry/view/50fa9246181026cd7ae2a5bc7e4ac7bd):
import os
import sys
from os.path import splitext
import pandas
from docplex.mp.model_reader import ModelReader
from docplex.util.environment import get_environment
from six import iteritems
def loadModelFiles():
"""Load the input CSVs and extract the model and param data from it
"""
env = get_environment()
inputModel = params = None
modelReader = ModelReader()
for inputName in [f for f in os.listdir('.') if splitext(f)[1] != '.py']:
inputBaseName, ext = splitext(inputName)
print(f'Info: loading {inputName}')
try:
if inputBaseName == 'model':
inputModel = modelReader.read_model(inputName, model_name=inputBaseName)
elif inputBaseName == 'params':
params = modelReader.read_prm(inputName)
except Exception as e:
with env.get_input_stream(inputName) as inStream:
inData = inStream.read()
raise Exception(f'Error: {e} found while processing {inputName} with contents {inData}')
if inputModel is None or params is None:
print('Warning: error loading model or params, see earlier messages for details')
return inputModel, params
def writeOutputs(outputs):
"""Write all dataframes in ``outputs`` as .csv.
Args:
outputs: The map of outputs 'outputname' -> 'output df'
"""
for (name, df) in iteritems(outputs):
csv_file = '%s.csv' % name
print(csv_file)
with get_environment().get_output_stream(csv_file) as fp:
if sys.version_info[0] < 3:
fp.write(df.to_csv(index=False, encoding='utf8'))
else:
fp.write(df.to_csv(index=False).encode(encoding='utf8'))
if len(outputs) == 0:
print("Warning: no outputs written")
# load and solve model
model, modelParams = loadModelFiles()
ok = model.solve(cplex_parameters=modelParams)
solution_df = pandas.DataFrame(columns=['name', 'value'])
for index, dvar in enumerate(model.solution.iter_variables()):
solution_df.loc[index, 'name'] = dvar.to_string()
solution_df.loc[index, 'value'] = dvar.solution_value
outputs = {}
outputs['solution'] = solution_df
# Generate output files
writeOutputs(outputs)
try:
with get_environment().get_output_stream('test.txt') as fp:
fp.write(f'{model.get_statistics()}'.encode('utf-8'))
except Exception as e:
with get_environment().get_output_stream('excInfo') as fp:
fp.write(f'Got exception {e}')
and a stub of the code that runs it (again, pulling heavily from the example):
prmFile = NamedTemporaryFile()
prmFile.write(self.ctx.cplex_parameters.export_prm_to_string().encode())
modelFile = NamedTemporaryFile()
modelFile.write(self.solver.export_as_lp_string(hide_user_names=True).encode())
modelMetadata = {
self.client.repository.ModelMetaNames.NAME: self.name,
self.client.repository.ModelMetaNames.TYPE: 'do-docplex_12.9',
self.client.repository.ModelMetaNames.RUNTIME_UID: 'do_12.9'
}
baseDir = os.path.dirname(os.path.realpath(__file__))
def reset(tarinfo):
tarinfo.uid = tarinfo.gid = 0
tarinfo.uname = tarinfo.gname = 'root'
return tarinfo
with NamedTemporaryFile() as tmp:
tar = tarfile.open(tmp.name, 'w:gz')
tar.add(f'{baseDir}/ibm_model.py', arcname='main.py', filter=reset)
tar.add(prmFile.name, arcname='params.prm', filter=reset)
tar.add(modelFile.name, arcname='model.lp', filter=reset)
tar.close()
modelDetails = self.client.repository.store_model(
model=tmp.name,
meta_props=modelMetadata
)
modelUid = self.client.repository.get_model_uid(modelDetails)
metaProps = {
self.client.deployments.ConfigurationMetaNames.NAME: self.name,
self.client.deployments.ConfigurationMetaNames.BATCH: {},
self.client.deployments.ConfigurationMetaNames.COMPUTE: {'name': 'S', 'nodes': 1}
}
deployDetails = self.client.deployments.create(modelUid, meta_props=metaProps)
deployUid = self.client.deployments.get_uid(deployDetails)
solvePayload = {
# we upload input data as part of model since only CSV data is supported in this interface
self.client.deployments.DecisionOptimizationMetaNames.INPUT_DATA: [],
self.client.deployments.DecisionOptimizationMetaNames.OUTPUT_DATA: [
{
"id": ".*"
}
]
}
jobDetails = self.client.deployments.create_job(deployUid, solvePayload)
jobUid = self.client.deployments.get_job_uid(jobDetails)
while jobDetails['entity']['decision_optimization']['status']['state'] not in ['completed', 'failed',
'canceled']:
logger.debug(jobDetails['entity']['decision_optimization']['status']['state'] + '...')
time.sleep(5)
jobDetails = self.client.deployments.get_job_details(jobUid)
logger.debug(jobDetails['entity']['decision_optimization']['status']['state'])
# cleanup
self.client.repository.delete(modelUid)
prmFile.close()
modelFile.close()
Any ideas of what can be causing this or what a good test avenue is? It seems there's no way to view the output of the model for debugging, am I missing something in Watson studio?
I tryed something very similar from your code and the solution is included in the payload when the job is completed.
See this shared notebook: https://dataplatform.cloud.ibm.com/analytics/notebooks/v2/cfbe34a0-52a8-436c-99bf-8df6979c11da/view?access_token=220636400ecdf537fb5ea1b47d41cb10f1b252199d1814d8f96a0280ec4a4e1e
I the last cells, after the job is completed, I print the status:
print(jobDetails['entity']['decision_optimization'])
and get
{'output_data_references': [], 'input_data': [], 'solve_state': {'details': {'PROGRESS_GAP': '0.0', 'MODEL_DETAIL_NONZEROS': '3', 'MODEL_DETAIL_TYPE': 'MILP', 'MODEL_DETAIL_CONTINUOUS_VARS': '0', 'MODEL_DETAIL_CONSTRAINTS': '2', 'PROGRESS_CURRENT_OBJECTIVE': '100.0', 'MODEL_DETAIL_INTEGER_VARS': '2', 'MODEL_DETAIL_KPIS': '[]', 'MODEL_DETAIL_BOOLEAN_VARS': '0', 'PROGRESS_BEST_OBJECTIVE': '100.0'}, 'solve_status': 'optimal_solution'}, 'output_data': [{'id': 'test.txt', 'fields': ['___TEXT___'], 'values': [['IC0gbnVtYmVyIG9mIHZhcmlhYmxlczogMgogICAtIGJpbmFyeT0wLCBpbnRlZ2VyPTIsIGNvbnRpbnVvdXM9MAogLSBudW1iZXIgb2YgY29uc3RyYWludHM6IDIKICAgLSBsaW5lYXI9Mg==']]}, {'id': 'solution.json', 'fields': ['___TEXT___'], 'values': [['eyJDUExFWFNvbHV0aW9uIjogeyJ2ZXJzaW9uIjogIjEuMCIsICJoZWFkZXIiOiB7InByb2JsZW1OYW1lIjogIm1vZGVsIiwgIm9iamVjdGl2ZVZhbHVlIjogIjEwMC4wIiwgInNvbHZlZF9ieSI6ICJjcGxleF9sb2NhbCJ9LCAidmFyaWFibGVzIjogW3siaW5kZXgiOiAiMCIsICJuYW1lIjogIngiLCAidmFsdWUiOiAiNS4wIn0sIHsiaW5kZXgiOiAiMSIsICJuYW1lIjogInkiLCAidmFsdWUiOiAiOTUuMCJ9XSwgImxpbmVhckNvbnN0cmFpbnRzIjogW3sibmFtZSI6ICJjMSIsICJpbmRleCI6IDB9LCB7Im5hbWUiOiAiYzIiLCAiaW5kZXgiOiAxfV19fQ==']]}, {'id': 'solution.csv', 'fields': ['name', 'value'], 'values': [['x', 5], ['y', 95]]}], 'status': {'state': 'completed', 'running_at': '2020-03-09T06:45:29.759Z', 'completed_at': '2020-03-09T06:45:30.470Z'}}
which contains in output:
'output_data': [{
'id': 'test.txt',
'fields': ['___TEXT___'],
'values': [['IC0gbnVtYmVyIG9mIHZhcmlhYmxlczogMgogICAtIGJpbmFyeT0wLCBpbnRlZ2VyPTIsIGNvbnRpbnVvdXM9MAogLSBudW1iZXIgb2YgY29uc3RyYWludHM6IDIKICAgLSBsaW5lYXI9Mg==']]
}, {
'id': 'solution.json',
'fields': ['___TEXT___'],
'values': [['eyJDUExFWFNvbHV0aW9uIjogeyJ2ZXJzaW9uIjogIjEuMCIsICJoZWFkZXIiOiB7InByb2JsZW1OYW1lIjogIm1vZGVsIiwgIm9iamVjdGl2ZVZhbHVlIjogIjEwMC4wIiwgInNvbHZlZF9ieSI6ICJjcGxleF9sb2NhbCJ9LCAidmFyaWFibGVzIjogW3siaW5kZXgiOiAiMCIsICJuYW1lIjogIngiLCAidmFsdWUiOiAiNS4wIn0sIHsiaW5kZXgiOiAiMSIsICJuYW1lIjogInkiLCAidmFsdWUiOiAiOTUuMCJ9XSwgImxpbmVhckNvbnN0cmFpbnRzIjogW3sibmFtZSI6ICJjMSIsICJpbmRleCI6IDB9LCB7Im5hbWUiOiAiYzIiLCAiaW5kZXgiOiAxfV19fQ==']]
}, {
'id': 'solution.csv',
'fields': ['name', 'value'],
'values': [['x', 5], ['y', 95]]
}
],
Hope this helps.
Alain
Thanks to Alain for verifying the overall approach but the main issue was there was simply a bug in my code:
After calling modelFile.write(...) it's necessary to call modelFile.seek(0) to reset the file pointer - otherwise it writes an empty file to the tar archive
Related
how do i extract the data is this CSV as a python dictionary without importing packages?
sample of the data:
User-ID;"ISBN";"Book-Rating"
276725;"034545104X";"0"
276726;"0155061224";"5"
276727;"0446520802";"0"
276729;"052165615X";"3"
def loadRatings():
# Get bookratings
try:
bookR = {}
for line in open('booktext.csv'):
(id,title) = line.split(';')[0:2]
bookR[id] = title
return bookR
except IOError as ioerr:
print('File error: ' + str(ioerr))
print(loadRatings())
but i need my result to be like
bookR = {User-ID: 276725, ISBN: 034545104X, Rating: 0}
this code will return
with open("booktext.csv") as f:
for i, line in enumerate(f):
# skip header
if i == 0:
continue
row_lst = line.replace("\n","").replace('"','').split(";")
if len(row_lst) == 3:
bookR = {
"User-ID": row_lst[0],
"ISBN": row_lst[1],
"Rating": row_lst[2]
}
print(bookR)
{'User-ID': '276725', 'ISBN': '034545104X', 'Rating': '0'}
{'User-ID': '276726', 'ISBN': '0155061224', 'Rating': '5'}
{'User-ID': '276727', 'ISBN': '0446520802', 'Rating': '0'}
{'User-ID': '276729', 'ISBN': '052165615X', 'Rating': '3'}
You always should use context manager with when working with files unless you really know and have a good reason why not to do that. Read more on that on https://stackoverflow.com/a/3012921/20646982
The description is vague in terms of what you are looking for, not clear either it should be a single dict of all items, or just a separate lines. In case you need a normal dict you can use this simple approach with just few formatting later depends on data type you are requiring.
I managed to recreate results like this:
with open('ex.csv',newline="") as f:
d = list(f.read().split(' '))
keys = d[0].split(';')
values = d[1:]
book = {}
for idx, key in enumerate(keys):
book[key] = []
for i in range(len(values)):
book[key].append(values[i].split(';')[idx])
Which produces results:
{'User-ID': ['276725', '276726', '276727', '276729'],
'"ISBN"': ['"034545104X"', '"0155061224"', '"0446520802"', '"052165615X"'],
'"Book-Rating"': ['"0"', '"5"', '"0"', '"3"']}
import csv
filename ="Geeks.csv"
# opening the file using "with"
# statement
with open(filename, 'r') as data:
for line in csv.DictReader(data):
print(line)
I'm a python beginner. I would like to ask for help regarding the retrieve the response data. Here's my script:
import pandas as pd
import re
import time
import requests as re
import json
response = re.get(url, headers=headers, auth=auth)
data = response.json()
Here's a part of json response:
{'result': [{'display': '',
'closure_code': '',
'service_offer': 'Integration Platforms',
'updated_on': '2022-04-23 09:05:53',
'urgency': '2',
'business_service': 'Operations',
'updated_by': 'serviceaccount45',
'description': 'ALERT returned 400 but expected 200',
'sys_created_on': '2022-04-23 09:05:53',
'sys_created_by': 'serviceaccount45',
'subcategory': 'Integration',
'contact_type': 'Email',
'problem_type': 'Design: Availability',
'caller_id': '',
'action': 'create',
'company': 'aaaa',
'priority': '3',
'status': '1',
'opened': 'smith.j',
'assigned_to': 'doe.j',
'number': '123456',
'group': 'blabla',
'impact': '2',
'category': 'Business Application & Databases',
'caused_by_change': '',
'location': 'All Locations',
'configuration_item': 'Monitor',
},
I would like to extract the data only for one group = 'blablabla'. Then I would like to extract fields such as:
number = data['number']
group = data['group']
service_offer = data['service_offer']
updated = data['updated_on']
urgency = data['urgency']
username = data['created_by']
short_desc = data['description']
How it should be done?
I know that to check the first value I should use:
service_offer = data['result'][0]['service_offer']
I've tried to create a dictionary, but, I'm getting an error:
data_result = response.json()['result']
payload ={
number = data_result['number']
group = data_result['group']
service_offer = data_result['service_offer']
updated = data_result['updated_on']
urgency = data_result['urgency']
username = data_result['created_by']
short_desc = data_result['description']
}
TypeError: list indices must be integers or slices, not str:
So, I've started to create something like below., but I'm stuck:
get_data = []
if len(data) > 0:
for item in range(len(data)):
get_data.append(data[item])
May I ask for help?
If data is your decoded json response from the question then you can do:
# find group `blabla` in result:
g = next(d for d in data["result"] if d["group"] == "blabla")
# get data from the `blabla` group:
number = g["number"]
group = g["group"]
service_offer = g["service_offer"]
updated = g["updated_on"]
urgency = g["urgency"]
username = g["sys_created_by"]
short_desc = g["description"]
print(number, group, service_offer, updated, urgency, username, short_desc)
Prints:
123456 blabla Integration Platforms 2022-04-23 09:05:53 2 serviceaccount45 ALERT returned 400 but expected 200
I have a rather basic bit of code. Basically what it does is sends an API request to a locally hosted Server and returns a JSON string. I'm taking that string and cracking it apart. Then I take what I need from it, make a Dictionary, and export it as an XML file with an nfo extension.
The issue is sometimes there are missing bits to the source data. Season is missing fairly frequently for example. It breaks the Data Mapping. I need a way to handle that. For somethings I may want to exclude the data and for others I need a sane default value.
#!/bin/env python
import os
import requests
import re
import json
import dicttoxml
import xml.dom.minidom
from xml.dom.minidom import parseString
# Grab Shoko Auth Key
apiheaders = {
'Content-Type': 'application/json',
'Accept': 'application/json',
}
apidata = '{"user": "Default", "pass": "", "device": "CLI"}'
r = requests.post('http://192.168.254.100:8111/api/auth',
headers=apiheaders, data=apidata)
key = json.loads(r.text)['apikey']
# Grabbing Episode Data
EpisodeHeaders = {
'accept': 'text/plain',
'apikey': key
}
EpisodeParams = (
('filename',
"FILE HERE"),
('pic', '1'),
)
fileinfo = requests.get(
'http://192.168.254.100:8111/api/ep/getbyfilename', headers=EpisodeHeaders, params=EpisodeParams)
# Mapping Data from Shoko to Jellyfin NFO
string = json.loads(fileinfo.text)
print(string)
eplot = json.loads(fileinfo.text)['summary']
etitle = json.loads(fileinfo.text)['name']
eyear = json.loads(fileinfo.text)['year']
episode = json.loads(fileinfo.text)['epnumber']
season = json.loads(fileinfo.text)['season']
aid = json.loads(fileinfo.text)['aid']
seasonnum = season.split('x')
# Create Dictionary From Mapped Data
show = {
"plot": eplot,
"title": etitle,
"year": eyear,
"episode": episode,
"season": seasonnum[0],
}
Here is some example output when the code crashes
{'type': 'ep', 'eptype': 'Credits', 'epnumber': 1, 'aid': 10713, 'eid': 167848,
'id': 95272, 'name': 'Opening', 'summary': 'Episode Overview not Available',
'year': '2014', 'air': '2014-11-23', 'rating': '10.00', 'votes': '1',
'art': {'fanart': [{'url': '/api/v2/image/support/plex_404.png'}],
'thumb': [{'url': '/api/v2/image/support/plex_404.png'}]}}
Traceback (most recent call last):
File "/home/fletcher/Documents/Shoko-Jellyfin-NFO/Xml3.py", line 48, in <module>
season = json.loads(fileinfo.text)['season']
KeyError: 'season'
The solution based on what Mahori suggested. Worked perfectly.
eplot = json.loads(fileinfo.text).get('summary', None)
etitle = json.loads(fileinfo.text).get('name', None)
eyear = json.loads(fileinfo.text).get('year', None)
episode = json.loads(fileinfo.text).get('epnumber', None)
season = json.loads(fileinfo.text).get('season', '1x1')
aid = json.loads(fileinfo.text).get('aid', None)
This is fairly common scenario with web development, where you cannot always assume other party will send all keys.
The standard way to get around this is by using get instead of named fetch.
season = json.loads(fileinfo.text).get('season', None)
#you can change None to any default value here
I have a function that verifies if a given input string is a proper GCP zone:
def validate_zone(compute, project_id, zone):
try:
zone_response = compute.zones().get(project=project_id, zone=zone).execute()
print(zone_response)
print(zone_response.return_value)
if ['status'] in zone_response:
zone_details = {
'status': zone_response['status'],
'region': zone_response['region'],
'name': zone_response['name']
}
return zone_details
else:
return "Zone {} not found for project {}".format(zone, project_id)
except HttpError as error:
print("Error calling zone {}: \n {}".format(zone, error))
I am trying to write a test to verify that but I can't mock the output of the compute method correctly.
#patch('googleapiclient.discovery')
def test_validate_zone(self, mock_response):
compute = mock_response.build(serviceName='compute', version='v1')
compute.zones().get(project_id=self.project_id, zone=self.zone).execute().return_value = {
'status': 'status',
'region': 'region',
'name': 'name'
}
zone_response = inventory.validate_zone(compute, self.project_id, self.zone)
print(zone_response)
This results in the zone_response output being a MagicMock object with its return_value being correct as developed in the test.
zone_response = MagicMock name='discovery.build().zones().get().execute()' id='139870134525456'
zone_response.return_value = {'status': 'status', 'region': 'region', 'name': 'name'}
Any ideas on what I'm doing wrong? I've been trying to write tests for this for quite a while so maybe my approach is just off.
Turns out the issue was the () on the execute method in the test. So the correct test should be:
#patch('inventory.discovery.build', serviceName='compute', version='v1')
def test_validate_zone(self, compute):
print(compute)
compute.zones().get(project_id=self.project_id, zone=self.zone).execute.return_value = {
'status': 'status',
'region': 'region',
'name': 'name'
}
zone_response = inventory.validate_zone(compute, self.project_id, self.zone)
print(zone_response)
Source can be found at: https://realpython.com/python-mock-library/#managing-a-mocks-return-value
I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)
You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}
You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}