How to convert python CPU script to runs on GPU - python

How to edit a python script that runs on CPU to runs on GPU which support cuda?
here is the code
import numpy as np
import pandas as pd
from datetime import datetime as dt
import scipy.signal
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import os
from multiprocessing import Pool
import tqdm
def get_start(l):
for i,line in enumerate(l):
if 'Zeit' in line or 'Time' in line:
return i+1
def get_meas2(l):
index = {
0 :'fl',
1 :'fr',
2 :'rl',
3 :'rr',
4 :'rfl',
5 :'rfr',
6 :'rrl',
7 :'rrr',
}
res = {
'fl' : None ,
'fr' : None ,
'rl' : None ,
'rr' : None ,
'rfl' : None ,
'rfr' : None ,
'rrl' : None ,
'rrr' : None ,
}
res_c =res.copy()
meas = []
i=0
s = get_start(l)
for line in l[s:]:
sp = line.split(';')
sensor = index[int(sp[1])]
tt = sp[0].replace(' PM','').replace(' AM','')
t = dt.strptime (tt,'%m/%d/%Y %H:%M:%S')
val = float(sp[2].replace(',','.'))
if res_c[sensor]==None:
res_c[sensor] = val
else:
res_c['sample'] = i
res_c['time'] = t
i+=1
for k in res_c.keys():
if res_c[k]==None:
res_c[k]=0
meas.append(res_c)
res_c = res.copy()
res_c[sensor] = val
return meas
def read_csv1(filepath):
l = open(filepath,'r',errors='backslashreplace').readlines()
js ={
'createdBy': None ,
'csvFileName': None ,
'eftFileName': None ,
'hasCoDriver': None ,
'hasDriver': None ,
'measurement': None ,
'plateHeight': None ,
'plateLength': 600 ,
'rating': None ,
'recordedOn': None ,
'remarks': None ,
'sampleRate': 50 ,
'vehicleChassis': None ,
'vehicleManufacturer': None ,
'vehicleModel': None ,
'vehicleType': None ,
'vehicleWheelBase': 2650 ,
'vehicleYearOfConstruction': None ,
'version': None ,
}
js['measurement'] = get_meas2(l)
# return js
return parse_data(js)
def parse_data(data):
measurments = {}
for key in data["measurement"][0].keys():
measurments[key] = [d[key] for d in data["measurement"]]
return measurments
def AVG2(a,n):
windows = []
for k,v in n.items():
windows += [k] * v
i=0
res = []
cont = True
while cont:
for w in windows:
if i+w <len(a):
res.append(np.mean(a[i:i+w]))
i +=w
else:
cont = False
return res
def findFirstRise(a):
n = 5
m = 20
for i in range(len(a)-11):
if a[i+m] >1 and a[i]!=0:
if a[i+m]/a[i] > n:
return i
def sync(a, window={4:10,5:1}, k=0.0065, Wn=90, fs=5000):
# Step 1 --> under sampling
a = [float(i) for i in a if i!='#VALUE!' and i== i]
a = AVG2(a,window)
#Setp 2 --> scaling
a = np.vectorize(lambda x: np.abs(x-np.mean(a)))(a)
# k = np.mean(GT)/np.mean(a)
a = np.vectorize(lambda x: x*k)(a)
#step 3 --> iir filter
bb, aa = scipy.signal.iirfilter(1, Wn=Wn, fs=fs, btype="low", ftype="butter")
y_lfilter = scipy.signal.lfilter(bb, aa, a)
# y_lfilter = scipy.signal.lfilter((0.0661221,0.0661221), (1.,-0.8677558), a)
return y_lfilter
def syncP(a,param):
ik,iwn,ifs,i4,i5 = param
window={4:i4,5:i5}
k=ik
Wn=iwn
fs=ifs
# Step 1 --> under sampling
a = [float(i) for i in a if i!='#VALUE!' and i== i]
a = AVG2(a,window)
#Setp 2 --> scaling
a = np.vectorize(lambda x: np.abs(x-np.mean(a)))(a)
# k = np.mean(GT)/np.mean(a)
a = np.vectorize(lambda x: x*k)(a)
#step 3 --> iir filter
bb, aa = scipy.signal.iirfilter(1, Wn=Wn, fs=fs, btype="low", ftype="butter")
# print(bb, aa, sep="\n")
y_lfilter = scipy.signal.lfilter(bb, aa, a)
# y_lfilter = scipy.signal.lfilter((0.0661221,0.0661221), (1.,-0.8677558), a)
return y_lfilter
def syncOfsset(a,sb,offset=None):
if not offset:
offset = findFirstRise(a) - findFirstRise(sb)
if offset >0:
a = a[offset:]
else:
sb = sb[np.abs(offset):]
l = min(len(a),len(sb))
return a[:l],sb[:l],(offset,l)
def findOffset(a,sb):
return findFirstRise(a) - findFirstRise(sb)
def mae(a,sb):
if len(a) != len(sb): raise ValueError(f'len(a)= {len(a)} while len(sb) = {len(sb)}')
return np.abs(np.subtract(a,sb)).mean()
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
def getMoreThan3(a1,sb1):
aa = []
last=False
for i,v in enumerate(a1):
if v>3:
aa.append(True)
last=True
else:
if last:
if True in [j>3 for j in a1[i:i+1]]:
aa.append(True)
last = True
else:
aa.append(False)
last = False
else:
aa.append(False)
last = False
ca=[]
cb=[]
index = []
for i,(ai,sbi,t) in enumerate(zip(a1,sb1,aa)):
if t:
ca.append(ai)
cb.append(sbi)
index.append(i)
return ca,cb
def check2(a,b,param):
ik,iwn,ifs,i4,i5 = param
sb = sync(b, window={4:i4,5:i5}, k=ik ,Wn=iwn, fs=ifs)
a1,sb1,data = syncOfsset(a,sb)
offset,l = data
r2 = r2_score(a1,sb1)
for i in range(-5,5):
a2,sb2,data2 = syncOfsset(a,sb,offset=offset+i)
r22 = r2_score(a2,sb2)
if r22>r2:
a1,sb1,data = a2,sb2,data2
ca,cb = getMoreThan3(a1,sb1)
return mae(ca,cb),rmse(ca,cb),r2_score(ca,cb),pearsonr(ca,cb)[0],(a,sb),data
def check(a,b,param):
ik,iwn,ifs,i4,i5 = param
sb = sync(b, window={4:i4,5:i5}, k=ik ,Wn=iwn, fs=ifs)
a,sb,data = syncOfsset(a,sb)
ca,cb = getMoreThan3(a,sb)
return mae(ca,cb),rmse(ca,cb),r2_score(ca,cb),pearsonr(ca,cb)[0],(a,sb),data
def rmse(a,sb):
if len(a) != len(sb): raise ValueError(f'len(a)= {len(a)} while len(sb) = {len(sb)}')
return np.sqrt(np.square(np.subtract(a,sb)).mean())
def analyse(i,param):
f = f"test{i}.csv"
d1 = read_csv1(os.path.join('sensor1',f))
selSen = selectSensor(d1)
d1 = d1[selSen]
d3 = pd.read_csv(os.path.join('sensor3',f))['Unnamed: 2']
return check2(d1,d3,param),selSen
def selectSensor(a):
s = {}
for sensor in a:
if len(sensor)==2:
s[sensor] = max(a[sensor])
return max(s,key=s.get)
def handle_process(param):
i = 4
(res1,res2,r2,pr, _,data),selSen = analyse(i,param)
with open('results.csv','a') as f:
line = str(param) + ',' + str(res1) + ',' + str(res2) + ',' +str(r2) + ',' +str(pr) + ',' +str(data) + ',' +str(selSen) + ',' + '\n'
line = line.replace('(','').replace(')','')
f.write(line)
if __name__=='__main__':
base_dir1 = 'sensor1'
base_dir3 = 'sensor3'
rWn = range(80,100,5)
rK = range(65,80,5)
r4W = range(9,12,1)
r5W = range(0,3)
rFs = range(4000,6000,200)
params = []
for iWn in rWn:
for iFs in rFs:
for i4W in r4W:
for i5W in r5W:
for ik in rK:
params.append((ik/10000,iWn,iFs,i4W,i5W))
with open('results.csv','a') as f:
line = ['k','wn','fs','window4','window5',\
'mae(>3)','rmse(>3)','r2(>3)','pearsonr(>3)',\
'OffsetStart','length','SelectedSensor','\n']
f.write(','.join(line))
f.close()
pool = Pool(processes=8)
for _ in tqdm.tqdm(pool.imap_unordered(handle_process, params), total=len(params)):
pass
To make sure that I have the ability to run the code on the GPU, I successfully tested the sample code on my laptop and based on it, I modified my code as follows; But I am facing the following error after execution.
from numba import jit, cuda
import numpy as np
import pandas as pd
from datetime import datetime as dt
import scipy.signal
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import os
from multiprocessing import Pool
import tqdm
def get_start(l):
for i,line in enumerate(l):
if 'Zeit' in line or 'Time' in line:
return i+1
def get_meas2(l):
index = {
0 :'fl',
1 :'fr',
2 :'rl',
3 :'rr',
4 :'rfl',
5 :'rfr',
6 :'rrl',
7 :'rrr',
}
res = {
'fl' : None ,
'fr' : None ,
'rl' : None ,
'rr' : None ,
'rfl' : None ,
'rfr' : None ,
'rrl' : None ,
'rrr' : None ,
}
res_c =res.copy()
meas = []
i=0
s = get_start(l)
for line in l[s:]:
sp = line.split(';')
sensor = index[int(sp[1])]
tt = sp[0].replace(' PM','').replace(' AM','')
t = dt.strptime (tt,'%m/%d/%Y %H:%M:%S')
val = float(sp[2].replace(',','.'))
if res_c[sensor]==None:
res_c[sensor] = val
else:
res_c['sample'] = i
res_c['time'] = t
i+=1
for k in res_c.keys():
if res_c[k]==None:
res_c[k]=0
meas.append(res_c)
res_c = res.copy()
res_c[sensor] = val
return meas
def read_csv1(filepath):
l = open(filepath,'r',errors='backslashreplace').readlines()
js ={
'createdBy': None ,
'csvFileName': None ,
'eftFileName': None ,
'hasCoDriver': None ,
'hasDriver': None ,
'measurement': None ,
'plateHeight': None ,
'plateLength': 600 ,
'rating': None ,
'recordedOn': None ,
'remarks': None ,
'sampleRate': 50 ,
'vehicleChassis': None ,
'vehicleManufacturer': None ,
'vehicleModel': None ,
'vehicleType': None ,
'vehicleWheelBase': 2650 ,
'vehicleYearOfConstruction': None ,
'version': None ,
}
js['measurement'] = get_meas2(l)
# return js
return parse_data(js)
def parse_data(data):
measurments = {}
for key in data["measurement"][0].keys():
measurments[key] = [d[key] for d in data["measurement"]]
return measurments
def AVG2(a,n):
windows = []
for k,v in n.items():
windows += [k] * v
i=0
res = []
cont = True
while cont:
for w in windows:
if i+w <len(a):
res.append(np.mean(a[i:i+w]))
i +=w
else:
cont = False
return res
def findFirstRise(a):
n = 5
m = 20
for i in range(len(a)-11):
if a[i+m] >1 and a[i]!=0:
if a[i+m]/a[i] > n:
return i
def sync(a, window={4:10,5:1}, k=0.0065, Wn=90, fs=5000):
# Step 1 --> under sampling
a = [float(i) for i in a if i!='#VALUE!' and i== i]
a = AVG2(a,window)
#Setp 2 --> scaling
a = np.vectorize(lambda x: np.abs(x-np.mean(a)))(a)
# k = np.mean(GT)/np.mean(a)
a = np.vectorize(lambda x: x*k)(a)
#step 3 --> iir filter
bb, aa = scipy.signal.iirfilter(1, Wn=Wn, fs=fs, btype="low", ftype="butter")
y_lfilter = scipy.signal.lfilter(bb, aa, a)
# y_lfilter = scipy.signal.lfilter((0.0661221,0.0661221), (1.,-0.8677558), a)
return y_lfilter
def syncP(a,param):
ik,iwn,ifs,i4,i5 = param
window={4:i4,5:i5}
k=ik
Wn=iwn
fs=ifs
# Step 1 --> under sampling
a = [float(i) for i in a if i!='#VALUE!' and i== i]
a = AVG2(a,window)
#Setp 2 --> scaling
a = np.vectorize(lambda x: np.abs(x-np.mean(a)))(a)
# k = np.mean(GT)/np.mean(a)
a = np.vectorize(lambda x: x*k)(a)
#step 3 --> iir filter
bb, aa = scipy.signal.iirfilter(1, Wn=Wn, fs=fs, btype="low", ftype="butter")
# print(bb, aa, sep="\n")
y_lfilter = scipy.signal.lfilter(bb, aa, a)
# y_lfilter = scipy.signal.lfilter((0.0661221,0.0661221), (1.,-0.8677558), a)
return y_lfilter
def syncOfsset(a,sb,offset=None):
if not offset:
offset = findFirstRise(a) - findFirstRise(sb)
if offset >0:
a = a[offset:]
else:
sb = sb[np.abs(offset):]
l = min(len(a),len(sb))
return a[:l],sb[:l],(offset,l)
def findOffset(a,sb):
return findFirstRise(a) - findFirstRise(sb)
def mae(a,sb):
if len(a) != len(sb): raise ValueError(f'len(a)= {len(a)} while len(sb) = {len(sb)}')
return np.abs(np.subtract(a,sb)).mean()
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
def getMoreThan3(a1,sb1):
aa = []
last=False
for i,v in enumerate(a1):
if v>3:
aa.append(True)
last=True
else:
if last:
if True in [j>3 for j in a1[i:i+1]]:
aa.append(True)
last = True
else:
aa.append(False)
last = False
else:
aa.append(False)
last = False
ca=[]
cb=[]
index = []
for i,(ai,sbi,t) in enumerate(zip(a1,sb1,aa)):
if t:
ca.append(ai)
cb.append(sbi)
index.append(i)
return ca,cb
#jit(target_backend='cuda')
def check2(a,b,param):
ik,iwn,ifs,i4,i5 = param
sb = sync(b, window={4:i4,5:i5}, k=ik ,Wn=iwn, fs=ifs)
a1,sb1,data = syncOfsset(a,sb)
offset,l = data
r2 = r2_score(a1,sb1)
for i in range(-5,5):
a2,sb2,data2 = syncOfsset(a,sb,offset=offset+i)
r22 = r2_score(a2,sb2)
if r22>r2:
a1,sb1,data = a2,sb2,data2
ca,cb = getMoreThan3(a1,sb1)
return mae(ca,cb),rmse(ca,cb),r2_score(ca,cb),pearsonr(ca,cb)[0],(a,sb),data
#jit(target_backend='cuda')
def check(a,b,param):
ik,iwn,ifs,i4,i5 = param
sb = sync(b, window={4:i4,5:i5}, k=ik ,Wn=iwn, fs=ifs)
a,sb,data = syncOfsset(a,sb)
ca,cb = getMoreThan3(a,sb)
return mae(ca,cb),rmse(ca,cb),r2_score(ca,cb),pearsonr(ca,cb)[0],(a,sb),data
#jit(target_backend='cuda')
def rmse(a,sb):
if len(a) != len(sb): raise ValueError(f'len(a)= {len(a)} while len(sb) = {len(sb)}')
return np.sqrt(np.square(np.subtract(a,sb)).mean())
#jit(target_backend='cuda')
def analyse(i,param):
f = f"test{i}.csv"
d1 = read_csv1(os.path.join('sensor1',f))
selSen = selectSensor(d1)
d1 = d1[selSen]
d3 = pd.read_csv(os.path.join('sensor3',f))['Unnamed: 2']
return check2(d1,d3,param),selSen
#jit(target_backend='cuda')
def selectSensor(a):
s = {}
for sensor in a:
if len(sensor)==2:
s[sensor] = max(a[sensor])
return max(s,key=s.get)
#jit(target_backend='cuda')
def handle_process(param):
i = 4
(res1,res2,r2,pr, _,data),selSen = analyse(i,param)
with open('results.csv','a') as f:
line = str(param) + ',' + str(res1) + ',' + str(res2) + ',' +str(r2) + ',' +str(pr) + ',' +str(data) + ',' +str(selSen) + ',' + '\n'
line = line.replace('(','').replace(')','')
f.write(line)
if __name__=='__main__':
base_dir1 = 'sensor1'
base_dir3 = 'sensor3'
rWn = range(80,100,5)
rK = range(65,80,5)
r4W = range(9,12,1)
r5W = range(0,3)
rFs = range(4000,6000,200)
params = []
for iWn in rWn:
for iFs in rFs:
for i4W in r4W:
for i5W in r5W:
for ik in rK:
params.append((ik/10000,iWn,iFs,i4W,i5W))
with open('results.csv','a') as f:
line = ['k','wn','fs','window4','window5',\
'mae(>3)','rmse(>3)','r2(>3)','pearsonr(>3)',\
'OffsetStart','length','SelectedSensor','\n']
f.write(','.join(line))
f.close()
pool = Pool(processes=1)
for _ in tqdm.tqdm(pool.imap_unordered(handle_process, params), total=len(params)):
pass
But I receive error:
0%| | 0/1080 [00:05<?, ?it/s]
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\ProgramData\miniconda3\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "C:\ProgramData\miniconda3\lib\site-packages\numba\core\dispatcher.py", line 471, in _compile_for_args
error_rewrite(e, 'unsupported_error')
File "C:\ProgramData\miniconda3\lib\site-packages\numba\core\dispatcher.py", line 409, in error_rewrite
raise e.with_traceback(None)
numba.core.errors.UnsupportedError: Failed in object mode pipeline (step: analyzing bytecode)
The 'with (context manager) as (variable):' construct is not supported.
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Amin\Downloads\AnalyseSelectSensor\analyseoneg.py", line 299, in <module>
for _ in tqdm.tqdm(pool.imap_unordered(handle_process, params), total=len(params)):
File "C:\ProgramData\miniconda3\lib\site-packages\tqdm\std.py", line 1195, in __iter__
for obj in iterable:
File "C:\ProgramData\miniconda3\lib\multiprocessing\pool.py", line 873, in next
raise value
numba.core.errors.UnsupportedError: Failed in object mode pipeline (step: analyzing bytecode)
The 'with (context manager) as (variable):' construct is not supported.
Do you have any idea, how I can solve this issue?
Kind regards.

Related

Tensorlow: "ValueError: setting an array element with a sequence."

I am implementing a model in TF1.15 and while trying to run the session, I get the error.
My dataset:
import numpy as np
import scipy.sparse as sp
from collections import OrderedDict
import tensorflow as tf
from .data_container import index_keys
index_keys = ["batch_seg", "idnb_i", "idnb_j", "id_expand_kj",
"id_reduce_ji", "id3dnb_i", "id3dnb_j", "id3dnb_k"]
class Qm9Data():
""" Qm9 dataset"""
def __init__(self, filename, cutoff, target_keys):
data_dict = np.load(filename, allow_pickle=True)
self.cutoff = cutoff
self.target_keys = target_keys
for key in ['id', 'N', 'Z', 'R']:
if key in data_dict:
setattr(self, key, data_dict[key])
else:
setattr(self, key, None)
self.targets = np.stack([data_dict[key] for key in self.target_keys], axis=1)
if self.N is None:
self.N = np.zeros(len(self.targets), dtype=np.int32)
self.N_cumsum = np.concatenate([[0], np.cumsum(self.N)])
assert self.R is not None
self.batch_seg = []
self.idnb_i = []
self.idnb_j = []
self.id_expand_kj = []
self.id_reduce_ji = []
self.id3dnb_i = []
self.id3dnb_j = []
self.id3dnb_k = []
for idx in range(len(self.targets)):
if type(idx) is int or type(idx) is np.int64:
idx = [idx]
data = {}
data['targets'] = np.array(self.targets[idx])
data['id'] = self.id[idx]
data['N'] = self.N[idx]
# data['batch_seg'] = np.repeat(np.arange(len(idx), dtype=np.int32), data['N'])
self.batch_seg.append(np.repeat(np.arange(len(idx), dtype=np.int32), data['N']))
adj_matrices = []
data['Z'] = np.zeros(np.sum(data['N']), dtype=np.int32)
data['R'] = np.zeros([np.sum(data['N']), 3], dtype=np.float32)
nend = 0
for k, i in enumerate(idx):
n = data['N'][k] # number of atoms
nstart = nend
nend = nstart + n
if self.Z is not None:
data['Z'][nstart:nend] = self.Z[self.N_cumsum[i]:self.N_cumsum[i + 1]]
R = self.R[self.N_cumsum[i]:self.N_cumsum[i + 1]]
data['R'][nstart:nend] = R
Dij = np.linalg.norm(R[:, None, :] - R[None, :, :], axis=-1)
adj_matrices.append(sp.csr_matrix(Dij <= self.cutoff))
adj_matrices[-1] -= sp.eye(n, dtype=np.bool)
# Entry x,y is edge x<-y (!)
adj_matrix = self._bmat_fast(adj_matrices)
# Entry x,y is edgeid x<-y (!)
atomids_to_edgeid = sp.csr_matrix(
(np.arange(adj_matrix.nnz), adj_matrix.indices, adj_matrix.indptr),
shape=adj_matrix.shape)
edgeid_to_target, edgeid_to_source = adj_matrix.nonzero()
# Target (i) and source (j) nodes of edges
# data['idnb_i'] = edgeid_to_target
self.idnb_i.append(edgeid_to_target)
# data['idnb_j'] = edgeid_to_source
self.idnb_j.append(edgeid_to_source)
# Indices of triplets k->j->i
ntriplets = adj_matrix[edgeid_to_source].sum(1).A1
id3ynb_i = np.repeat(edgeid_to_target, ntriplets)
id3ynb_j = np.repeat(edgeid_to_source, ntriplets)
id3ynb_k = adj_matrix[edgeid_to_source].nonzero()[1]
# Indices of triplets that are not i->j->i
id3_y_to_d, = (id3ynb_i != id3ynb_k).nonzero()
# data['id3dnb_i'] = id3ynb_i[id3_y_to_d]
self.id3dnb_i.append(id3ynb_i[id3_y_to_d])
# data['id3dnb_j'] = id3ynb_j[id3_y_to_d]
self.id3dnb_j.append(id3ynb_j[id3_y_to_d])
# data['id3dnb_k'] = id3ynb_k[id3_y_to_d]
self.id3dnb_k.append(id3ynb_k[id3_y_to_d])
# Edge indices for interactions
# j->i => k->j
# data['id_expand_kj'] = atomids_to_edgeid[edgeid_to_source, :].data[id3_y_to_d]
self.id_expand_kj.append(atomids_to_edgeid[edgeid_to_source, :].data[id3_y_to_d])
# j->i => k->j => j->i
# data['id_reduce_ji'] = atomids_to_edgeid[edgeid_to_source, :].tocoo().row[id3_y_to_d]
self.id_reduce_ji.append(atomids_to_edgeid[edgeid_to_source, :].tocoo().row[id3_y_to_d])
# print(self.batch_seg[idx[0]], data['idnb_i'], data['id3dnb_i'], data['id_expand_kj'])
# exit(0)
def __len__(self):
return self.targets.shape[0]
def _bmat_fast(self, mats):
new_data = np.concatenate([mat.data for mat in mats])
ind_offset = np.zeros(1 + len(mats))
ind_offset[1:] = np.cumsum([mat.shape[0] for mat in mats])
new_indices = np.concatenate(
[mats[i].indices + ind_offset[i] for i in range(len(mats))])
indptr_offset = np.zeros(1 + len(mats))
indptr_offset[1:] = np.cumsum([mat.nnz for mat in mats])
new_indptr = np.concatenate(
[mats[i].indptr[i >= 1:] + indptr_offset[i] for i in range(len(mats))])
return sp.csr_matrix((new_data, new_indices, new_indptr))
Session:
loss_train, _ = sess.run([optimizer],
feed_dict={placeholders['Z']: DATA.Z[idx['train']],
placeholders['R']: DATA.R[idx['train']],
placeholders['idnb_i']: [DATA.idnb_i[i] for i in idx['train']],
placeholders['idnb_j']: [DATA.idnb_j[i] for i in idx['train']],
placeholders['id_expand_kj']: [DATA.id_expand_kj[i] for i in idx['train']],
placeholders['id_reduce_ji']: [DATA.id_reduce_ji[i] for i in idx['train']],
placeholders['id3dnb_i']: [DATA.id3dnb_i[i] for i in idx['train']],
placeholders['id3dnb_j']: [DATA.id3dnb_j[i] for i in idx['train']],
placeholders['id3dnb_k']: [DATA.id3dnb_k[i] for i in idx['train']],
placeholders['batch_seg']: [DATA.batch_seg[i] for i in idx['train']],
placeholders['targets']: DATA.targets[idx['train']]})
The error is for the placeholders['targets'].
Traceback (most recent call last):
File "train_new.py", line 260, in <module> placeholders['targets']: DATA.targets[idx['train']]})
File ".local/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 956, in run run_metadata_ptr)
File ".local/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1149, in _run\np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
File "/usr/local/python3.7.5/lib/python3.7/site-packages/numpy/core/_asarray.py", line 85, in asarray\return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.

'tensorflow_federated' has no attribute 'NamedTupleType

I am following this code https://github.com/BUAA-BDA/FedShapley/tree/master/TensorflowFL and trying to run the file same_OR.py
I also place input file "initial_model_parameters.txt" and data folder "MNIST_data" in same folder
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import os
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.NamedTupleType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.NamedTupleType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__), "test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__), "test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__), "test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__), "test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i!="":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__), "weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split("***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split("***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type):
f_ini_p = open(os.path.join(os.path.dirname(__file__), "initial_model_parameters.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial_g = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial_g = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
model_g = {
'weights': w_initial_g,
'bias': b_initial_g
}
for i in range(len(grad[0])):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(grad[j][i], 1/len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(bi[j][i], 1/len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
if __name__ == "__main__":
start_time = time.time()
#data_num = np.asarray([5923,6742,5958,6131,5842])
#agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__), "weights_"+str(index)+".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(index) + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__), "initial_model_parameters.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = {
'weights': w_initial,
'bias': b_initial
}
model = initial_model
learning_rate = 0.1
for round_num in range(50):
local_models = federated_train(model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
#print(local_models[0][0])#第0个agent的weights矩阵
#print(local_models[0][1])#第0个agent的bias矩阵
#print(len(local_models))
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_"+str(local_index)+".txt"),"a",encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j)+"\t")
print(line, file=f)
print("***"+str(learning_rate)+"***",file=f)
print("-"*50,file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***",file=f)
print("-"*50,file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(local_models[local_model_index][0], 1/NUM_AGENT), m_w)
m_b = np.add(np.multiply(local_models[local_model_index][1], 1/NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time()-start_time)
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE))
print(str(s)+"\t"+str(group_shapley_value[len(group_shapley_value)-1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
for ag_s in agent_shapley:
print(ag_s)
print("end_time", time.time()-start_time)
I installed tensor flow federated with this command
pip install --upgrade tensorflow_federated
and this line is also underlied with red color
import tensorflow.compat.v1 as tf
when i tried to execute go this error
File "same_OR.py", line 94, in
BATCH_TYPE = tff.NamedTupleType([ AttributeError: module 'tensorflow_federated' has no attribute 'NamedTupleType'
where is the problem? anyone can help?
tff.NamedTupleType was renamed to tff.StructType in TFF version 0.16.0 (release notes).
Two options:
Install a pre-0.16.0 version of TFF: this should be doable with pip install tensorflow_federated=0.15.0.
Update the code: the error should go away after replacing the tff.NamedTupleType with tff.StructType in the snippet:
BATCH_TYPE = tff.NamedTupleType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.NamedTupleType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])

Confidence score of answer extracted using ELMo BiDAF model and AllenNLP

I'm working on a Deep Learning project where I use a bidirectional attention flow model (allennlp pretrained model)to make a question answering system.It uses squad dataset.The bidaf model extracts the answer span from paragraph.Is there any way to determine the confidence score(accuracy)or any other metrics of the answer extracted by the model?
I have used the subcommand evaluate from the allennlp package but it determines only score of the model after testing.I was hoping there is a much easier way to solve the issue using other such command.
Attaching the code and the terminal output below.
from rake_nltk import Rake
from string import punctuation
from nltk.corpus import stopwords
from allennlp.predictors.predictor import Predictor
import spacy
import wikipedia
import re
import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import traceback
from nltk.stem import SnowballStemmer
from nltk.util import ngrams
from math import log10
from flask import Flask, request, jsonify, render_template
from gevent.pywsgi import WSGIServer
import time
import multiprocessing as mp
from gtts import gTTS
import os
NLP = spacy.load('en_core_web_md')
stop = stopwords.words('english')
symbol = r"""!#$%^&*();:\n\t\\\"!\{\}\[\]<>-\?"""
stemmer = SnowballStemmer('english')
wikipedia.set_rate_limiting(True)
session = HTMLSession()
results = 5
try:
predictor = Predictor.from_path("bidaf-model-2017.09.15-charpad.tar.gz")
except:
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz")
try:
srl = Predictor.from_path('srl-model-2018.05.25.tar.gz')
except:
srl = Predictor.from_path('https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz')
key = Rake(min_length=1, stopwords=stop, punctuations=punctuation, max_length=6)
wh_words = "who|what|how|where|when|why|which|whom|whose|explain".split('|')
stop.extend(wh_words)
session = HTMLSession()
output = mp.Queue()
def termFrequency(term, doc):
normalizeTermFreq = re.sub('[\[\]\{\}\(\)]', '', doc.lower()).split()
normalizeTermFreq = [stemmer.stem(i) for i in normalizeTermFreq]
dl = len(normalizeTermFreq)
normalizeTermFreq = ' '.join(normalizeTermFreq)
term_in_document = normalizeTermFreq.count(term)
#len_of_document = len(normalizeTermFreq )
#normalized_tf = term_in_document / len_of_document
normalized_tf = term_in_document
return normalized_tf, normalizeTermFreq, dl#, n_unique_term
def inverseDocumentFrequency(term, allDocs):
num_docs_with_given_term = 0
for doc in allDocs:
if term in doc:
num_docs_with_given_term += 1
if num_docs_with_given_term > 0:
total_num_docs = len(allDocs)
idf_val = log10(((total_num_docs+1) / num_docs_with_given_term))
term_split = term.split()
if len(term_split) == 3:
if len([term_split[i] for i in [0, 2] if term_split[i] not in stop]) == 2:
return idf_val*1.5
return idf_val
return idf_val
else:
return 0
def sent_formation(question, answer):
tags_doc = NLP(question)
tags_doc_cased = NLP(question.title())
tags_dict_cased = {i.lower_:i.pos_ for i in tags_doc_cased}
tags_dict = {i.lower_:i.pos_ for i in tags_doc}
question_cased = []
for i in question[:-1].split():
if tags_dict[i] == 'PROPN' or tags_dict[i] == 'NOUN':
question_cased.append(i.title())
else:
question_cased.append(i.lower())
question_cased.append('?')
question_cased = ' '.join(question_cased)
#del tags_dict,tags_doc, tags_doc_cased
pre = srl.predict(question_cased)
verbs = []
arg1 = []
for i in pre['verbs']:
verbs.append(i['verb'])
if 'B-ARG1' in i['tags']:
arg1.append((i['tags'].index('B-ARG1'), i['tags'].count('I-ARG1'))\
if not pre['words'][i['tags'].index('B-ARG1')].lower() in wh_words else \
(i['tags'].index('B-ARG2'), i['tags'].count('I-ARG2')))
arg1 = arg1[0] if arg1 else []
if not arg1:
verb_idx = pre['verbs'][0]['tags'].index('B-V')
verb = pre['words'][verb_idx] if pre['words'][verb_idx] != answer.split()[0].lower() else ''
subj_uncased = pre['words'][verb_idx+1:] if pre['words'][-1] not in symbol else \
pre['words'][verb_idx+1:-1]
else:
verb = ' '.join(verbs)
subj_uncased = pre['words'][arg1[0]:arg1[0]+arg1[1]+1]
conj = ''
if question.split()[0].lower() == 'when':
conj = ' on' if len(answer.split()) > 1 else ' in'
subj = []
for n, i in enumerate(subj_uncased):
if tags_dict_cased[i.lower()] == 'PROPN' and tags_dict[i.lower()] != 'VERB' or n == 0:
subj.append(i.title())
else:
subj.append(i.lower())
subj[0] = subj[0].title()
print(subj)
print(pre)
subj = ' '.join(subj)
sent = "{} {}{} {}.".format(subj, verb, conj, answer if answer[-1] != '.' else answer[:-1])
return sent
class extractAnswer:
def __init__(self):
self.wiki_error = (wikipedia.exceptions.DisambiguationError,
wikipedia.exceptions.HTTPTimeoutError,
wikipedia.exceptions.WikipediaException)
self.article_title = None
# symbol = """!#$%^&*();:\n\t\\\"!\{\}\[\]<>-\?"""
def extractAnswer_model(self, passage, question, s=0.4, e=0.3, wiki=False):
if type(passage) == list:
passage = " ".join(passage)
if not question[-1] == '?':
question = question+'?'
pre = predictor.predict(passage=passage, question=question)
if wiki:
if max(pre['span_end_probs']) > 0.5:
s = 0.12
elif max(pre['span_end_probs']) > 0.4:
s = 0.13
elif max(pre['span_end_probs']) > 0.35:
s = 0.14
if max(pre['span_start_probs']) > 0.5:
e = 0.12
elif max(pre['span_start_probs']) > 0.4:
e = 0.14
elif max(pre['span_start_probs']) > 0.3:
e = 0.15
if max(pre['span_start_probs']) > s and max(pre['span_end_probs']) > e:
key.extract_keywords_from_text(question)
ques_key = [stemmer.stem(i) for i in ' '.join(key.get_ranked_phrases())]
key.extract_keywords_from_text(passage)
pass_key = [stemmer.stem(i) for i in ' '.join(key.get_ranked_phrases())]
l = len(ques_key)
c = 0
for i in ques_key:
if i in pass_key:
c += 1
if c >= l/2:
print(max(pre['span_start_probs']),
max(pre['span_end_probs']))
if wiki:
return pre['best_span_str'], max(pre['span_start_probs']) + max(pre['span_end_probs'])
try:
ans = sent_formation(question, pre['best_span_str'])
except:
ans = pre['best_span_str']
print(traceback.format_exc())
return ans
print(ques_key, c, l)
print(max(pre['span_start_probs']), max(pre['span_end_probs']))
return 0, 0
else:
print(max(pre['span_start_probs']), max(pre['span_end_probs']), pre['best_span_str'])
return 0, 0
def wiki_search_api(self, query):
article_list = []
try:
article_list.extend(wikipedia.search(query, results=results))
print(article_list)
return article_list
except self.wiki_error:
params = {'search': query, 'profile': 'engine_autoselect',
'format': 'json', 'limit': results}
article_list.extend(requests.get('https://en.wikipedia.org/w/api.php?action=opensearch',
params=params).json()[1])
return article_list
except:
print('Wikipedia search error!')
print(traceback.format_exc())
return 0
def wiki_passage_api(self, article_title, article_list, output):
# Disambiguation_title = {}
try:
passage = wikipedia.summary(article_title)
output.put((article_title, self.passage_pre(passage)))
except wikipedia.exceptions.DisambiguationError as e:
print(e.options[0], e.options)
Disambiguation_pass = {}
for p in range(2 if len(e.options) > 1 else len(e.options)):
params = {'search':e.options[p], 'profile':'engine_autoselect', 'format':'json'}
article_url = requests.get('https://en.wikipedia.org/w/api.php?action=opensearch',
params=params).json()
if not article_url[3]:
continue
article_url = article_url[3][0]
r = session.get(article_url)
soup = BeautifulSoup(r.html.raw_html)
print(soup.title.string)
article_title_dis = soup.title.string.rsplit('-')[0].strip()
if article_title_dis in article_list:
print('continue')
continue
try:
url = "https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={}".format(article_title_dis)
passage = requests.get(url).json()['query']['pages']
for i in passage.keys():
if 'extract' in passage[i]:
Disambiguation_pass[article_title_dis] = self.passage_pre(passage[i]['extract'])
except wikipedia.exceptions.HTTPTimeoutError:
passage = wikipedia.summary(article_title_dis)
Disambiguation_pass[article_title_dis] = self.passage_pre(passage)
except:
Disambiguation_pass[article_title_dis] = ''
continue
output.put((article_title, Disambiguation_pass))
except:
output.put((article_title, ''))
print(traceback.format_exc())
def sorting(self, article, question, topic):
processes = [mp.Process(target=self.wiki_passage_api, args=(article[x], article, output))\
for x in range(len(article))]
for p in processes:
p.start()
for p in processes:
p.join(timeout=3)
results_p = [output.get() for p in processes]
article_list = []
passage_list = []
for i, j in results_p:
if type(j) != dict and j:
article_list.append(i)
passage_list.append(j)
elif type(j) == dict and j:
for k, l in j.items():
if l:
article_list.append(k)
passage_list.append(l)
normalize_passage_list = []
start = time.time()
keywords = " ".join(self.noun+self.ques_key+[topic.lower()])
keywords = re.sub('[{0}]'.format(symbol), ' ', keywords).split()
question = question+' '+topic
ques_tokens = [stemmer.stem(i.lower()) for i in question.split() \
if i.lower() not in wh_words]
print(ques_tokens)
keywords_bigram = [' '.join(i) for i in list(ngrams(ques_tokens, 2)) \
if i[0] not in stop and i[1] not in stop]
if len(ques_tokens) > 3:
keywords_trigram = [' '.join(i) for i in list(ngrams(ques_tokens, 3)) \
if (i[0] in stop) + (i[2] in stop) + (i[1] in stop) < 3]
else:
keywords_trigram = []
if len(ques_tokens) > 5:
keywords_4gram = [' '.join(i) for i in list(ngrams(ques_tokens, 4)) \
if (i[0] in stop) + (i[2] in stop) +(i[1] in stop)+(i[3] in stop) < 4]
else:
keywords_4gram = []
keywords_unigram = list(set([stemmer.stem(i.lower()) for i in keywords \
if i.lower() not in stop]))
keywords = keywords_unigram+list(set(keywords_bigram))+keywords_trigram+keywords_4gram
tf = []
if not passage_list:
return 0
pass_len = []
#n_u_t=[]
#key_dict = {i: keywords.count(i) for i in keywords}
print('Extraction complete')
#remove_pass={}
#for n,i in enumerate(passage_list):
#if len(i)<200 or not i:
#remove_pass[article_list[n]]=i
#print(n, article_list[n])
#passage_list=[i for i in passage_list if i not in remove_pass.values()]
#article_list=[i for i in article_list if i not in remove_pass.keys()]
passage_list_copy = passage_list.copy()
article_list_copy = article_list.copy()
for i in range(len(passage_list_copy)):
if passage_list.count(passage_list_copy[i]) > 1:
passage_list.remove(passage_list_copy[i])
article_list.remove(article_list_copy[i])
print('Copy:', article_list_copy[i])
del passage_list_copy
del article_list_copy
for n, i in enumerate(passage_list):
temp_tf = {}
c = 0
for j in keywords:
temp_tf[j], temp_pass, temp_len = termFrequency(j, i + ' ' + article_list[n])
if temp_tf[j]:
c += 1
normalize_passage_list.append(temp_pass)
pass_len.append(temp_len)
temp_tf['key_match'] = c
tf.append(temp_tf)
print(pass_len)
print(keywords)
idf = {}
for i in keywords:
idf[i] = inverseDocumentFrequency(i, normalize_passage_list)
#print(tf, idf)
tfidf = []
#b=0.333 #for PLN
b, k = 0.75, 1.2 #for BM25
avg_pass_len = sum(pass_len)/len(pass_len)
#pivot=sum(n_u_t)/len(n_u_t)
for n, i in enumerate(tf):
tf_idf = 0
#avg_tf=sum(i.values())/len(i)
key_match_ratio = i['key_match']/len(keywords)
for j in keywords:
#tf_idf+=idf[j]*((log(1+log(1+i[j])))/(1-b+(b*pass_len[n]/avg_pass_len))) #PLN
tf_idf += idf[j]*(((k+1)*i[j])/(i[j]+k*(1-b+(b*pass_len[n]/avg_pass_len)))) #BM25
tfidf.append(tf_idf*key_match_ratio)
tfidf = [i/sum(tfidf)*100 for i in tfidf if any(tfidf)]
if not tfidf:
return 0, 0, 0, 0, 0
print(tfidf)
print(article_list, len(passage_list))
if len(passage_list) > 1:
sorted_tfidf = sorted(tfidf, reverse=1)
idx1 = tfidf.index(sorted_tfidf[0])
passage1 = passage_list[idx1]
#article_title=
tfidf1 = sorted_tfidf[0]
idx2 = tfidf.index(sorted_tfidf[1])
passage2 = passage_list[idx2]
article_title = (article_list[idx1], article_list[idx2])
tfidf2 = sorted_tfidf[1]
else:
article_title = 0
tfidf2 = 0
if passage_list:
passage1 = passage_list[0]
tfidf1 = tfidf[0]
passage2 = 0
else:
passage1 = 0
passage2 = 0
tfidf1, tfidf2 = 0, 0
end = time.time()
print('TFIDF time:', end-start)
return passage1, passage2, article_title, tfidf1, tfidf2
def passage_pre(self, passage):
#passage=re.findall("[\da-zA-z\.\,\'\-\/\–\(\)]*", passage)
passage = re.sub('\n', ' ', passage)
passage = re.sub('\[[^\]]+\]', '', passage)
passage = re.sub('pronunciation', '', passage)
passage = re.sub('\\\\.+\\\\', '', passage)
passage = re.sub('{.+}', '', passage)
passage = re.sub(' +', ' ', passage)
return passage
def wiki(self, question, topic=''):
if not question:
return 0
question = re.sub(' +', ' ', question)
question = question.title()
key.extract_keywords_from_text(question)
self.ques_key = key.get_ranked_phrases()
doc = NLP(question)
self.noun = [str(i).lower() for i in doc.noun_chunks if str(i).lower() not in wh_words]
print(self.ques_key, self.noun)
question = re.sub('[{0}]'.format(symbol), ' ', question)
if not self.noun + self.ques_key:
return 0
article_list = None
question = question.lower()
if self.noun:
if len(self.noun) == 2 and len(" ".join(self.noun).split()) < 6:
#question1=question
self.noun = " ".join(self.noun).split()
if self.noun[0] in stop:
self.noun.pop(0)
self.noun = question[question.index(self.noun[0]):question.index(self.noun[-1]) \
+len(self.noun[-1])+1].split()
#del question1
print(self.noun)
article_list = self.wiki_search_api(' '.join(self.noun))
if self.ques_key and not article_list:
article_list = self.wiki_search_api(self.ques_key[0])
if not article_list:
article_list = self.wiki_search_api(' '.join(self.ques_key))
if not article_list:
print('Article not found on wikipedia.')
return 0, 0
article_list = list(set(article_list))
passage1, passage2, article_title, tfidf1, tfidf2 = self.sorting(article_list,
question, topic)
if passage1:
ans1, conf1 = self.extractAnswer_model(passage1, question, s=0.20, e=0.20, wiki=True)
else:
ans1, conf1 = 0, 0
if ans1:
conf2 = 0
if len(ans1) > 600:
print(ans1)
print('Repeat')
ans1, conf1 = self.extractAnswer_model(ans1, question, s=0.20, e=0.20, wiki=True)
threshhold = 0.3 if not ((tfidf1- tfidf2) <= 10) else 0.2
if round(tfidf1- tfidf2) < 5:
threshhold = 0
if (tfidf1- tfidf2) > 20:
threshhold = 0.35
if (tfidf1- tfidf2) > 50:
threshhold = 1
if (passage2 and conf1 < 1.5) or (tfidf1 - tfidf2) < 10:
ans2, conf2 = self.extractAnswer_model(passage2, question, s=0.20, e=0.20,
wiki=True) if passage2 else (0, 0)
title = 0
if round(conf1, 2) > round(conf2, 2) - threshhold:
print('ans1')
ans = ans1
title = article_title[0] if article_title else 0
else:
print('ans2')
title = article_title[1] if article_title else 0
ans = ans2
if not question[-1] == '?':
question = question+'?'
try:
ans = sent_formation(question, ans)
except:
print(traceback.format_exc())
print(ans, '\n', '\n', article_title)
return ans, title
extractor = extractAnswer()
app = Flask(__name__)
#app.route("/", methods=["POST", "get"])
#app.route("/ans")
def ans():
start = time.time()
question = request.args.get('question')
topic = request.args.get('topic')
passage = request.args.get('passage')
if not question:
return render_template('p.html')
if not topic:
topic = ''
if passage:
answer = extractor.extractAnswer_model(passage, question)
else:
answer, title = extractor.wiki(question, topic)
end = time.time()
if answer:
mytext = str(answer)
language = 'en'
myobj = gTTS(text=mytext, lang=language, slow=False)
myobj.save("welcome.mp3")
# prevName = 'welcome.mp3'
#newName = 'static/welcome.mp3'
#os.rename(prevName,newName)
return render_template('pro.html', answer=answer)
else:
return jsonify(Status='E', Answer=answer, Time=end-start)
#app.route("/audio_del/", methods=["POST", "get"])
def audio_del():
return render_template('p.html');
#app.route("/audio_play/", methods=["POST", "get"])
def audio_play():
os.system("mpg321 welcome.mp3")
return render_template('white.html')
if __name__ == "__main__":
PORT = 7091
HTTP_SERVER = WSGIServer(('0.0.0.0', PORT), app)
print('Running on',PORT, '...')
HTTP_SERVER.serve_forever()
![Output in the terminal for a question I've asked](https://i.stack.imgur.com/6pyv5.jpg)
I came across a possible solution to this after deeply looking into the output returned by the model. Although this, is probably not something you can accurately rely on, it seemed to have done the task in my case:
Note that the text answer which is "best_span_str" is always a subarray of the passage. It spans the range which is stored in "best_span".
i.e., "best_span" contains the start and end index of the answer.
Now, the output data contains a property named "span_end_probs".
"span_end_probs" contains a list of values that correspond to all the words present in the text input.
If you look closely for various inputs, the value is always maximum at one of the indexes within the starting and ending range that "best_span" contains. This value seemed to be very similar to the confidence levels that we need. Let's call this value score. All you need to do now is to try some inputs and find a suitable method to use this score as a metric.
e.g.: if you need a threshold value for some application, you can try a number of test inputs and find a value that is most accurate. In my case, this was around 0.35.
i.e. if score is lesser than 0.35, it prints answer not found and if greater than or equal 0.35, prints string in "best_span_str".
Here's my code snippet:
from allennlp.predictors.predictor import Predictor
passage = '--INPUT PASSAGE--'
question = '--INPUT QUESTION--'
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo.2021-02-11.tar.gz")
output = predictor.predict(
passage = passage,
question = question
)
score = max(output["span_end_probs"])
if score < 0.35:
print('Answer not found')
else:
print(output["best_span_str"])
You can readily see the example input and output here.

How to standardize address type properly

I'm trying to standardize street address by converting the abbreviations to the full word (e.g. RD - Road). I created many lines to account for different spellings and ran into an issue where one replace code overrode another one
import pandas as pd
mydata = {'Street_type': ['PL', 'pl', 'Pl', 'PLACE', 'place']}
mydata = pd.DataFrame(mydata)
mydata['Street_type'] = mydata['Street_type'].replace('PL','Place',regex=True)
mydata['Street_type'] = mydata['Street_type'].replace('pl','Place',regex=True)
mydata['Street_type'] = mydata['Street_type'].replace('Pl','Place',regex=True)
mydata['Street_type'] = mydata['Street_type'].replace('PLACE','Place',regex=True)
mydata['Street_type'] = mydata['Street_type'].replace('place','Place',regex=True)
Instead of Place, I got Placeace. What is the best way to avoid this error? Do I write a if-else statement or any function? Thanks in advance!
Among other problems, you have overlapping logic: you fail to check that the target ("old") string is a full word before you replace it. For instance, with the input type of "PLACE", you trigger both the first and third replacements, generating PlaceACE and then PlaceaceACE before you get to the condition you wanted.
You need to work through your tracking and exclusion logic carefully, and then apply only one of the replacements. You can check the length of the street_type and apply the unique transition you need for that length.
If you're trying to convert a case statement, then you need to follow that logic pattern, rather than the successive applications you coded. You can easily look up how to simulate a "case" statement in Python.
Also consider using a translation dictionary, such as
type_trans = {
"pl": "Place",
"Pl": "Place",
"PLACE": "Place",
...
}
Then your change is simply
mydata['Street_type'] = type_trans[mydata['Street_type']]
Also, you might list all of the variants in a tuple, such as:
type_place = ("PL", "Pl", "pl", "PLACE", "place")
if mydata['Street_type'] in type_place
mydata['Street_type'] = "Place"
... but be sure to generalize this properly for your entire list of street types.
You can do this correctly with a single pass if you use a proper regex here, e.g. use word boundaries (\b):
In [11]: places = ["PL", "pl", "Pl", "PLACE", "Place", "place"]
In [12]: mydata.Street_type
Out[12]:
0 PL
1 pl
2 Pl
3 PLACE
4 place
Name: Street_type, dtype: object
In [13]: mydata.Street_type.replace("(^|\b)({})(\b|$)".format("|".join(places)), "Place", regex=True)
Out[13]:
0 Place
1 Place
2 Place
3 Place
4 Place
Name: Street_type, dtype: object
#Needlemanwunch
def zeros(shape):
retval = []
for x in range(shape[0]):
retval.append([])
for y in range(shape[1]):
retval[-1].append(0)
return retval
match_award = 10
mismatch_penalty = -3
gap_penalty = -4 # both for opening and extanding
def match_score(alpha, beta):
if alpha == beta:
return match_award
elif alpha == '-' or beta == '-':
return gap_penalty
else:
return mismatch_penalty
def finalize(align1, align2):
align1 = align1[::-1] #reverse sequence 1
align2 = align2[::-1] #reverse sequence 2
i,j = 0,0
#calcuate identity, score and aligned sequeces
symbol = ''
found = 0
score = 0
identity = 0
for i in range(0,len(align1)):
# if two AAs are the same, then output the letter
if align1[i] == align2[i]:
symbol = symbol + align1[i]
identity = identity + 1
score += match_score(align1[i], align2[i])
# if they are not identical and none of them is gap
elif align1[i] != align2[i] and align1[i] != '-' and align2[i] != '-':
score += match_score(align1[i], align2[i])
symbol += ' '
found = 0
#if one of them is a gap, output a space
elif align1[i] == '-' or align2[i] == '-':
symbol += ' '
score += gap_penalty
identity = float(identity) / len(align1) * 100
print('Similarity =', "%3.3f" % identity, 'percent')
print('Score =', score)
# print(align1)
# print(symbol)
# print(align2)
def needle(seq1, seq2):
m, n = len(seq1), len(seq2) # length of two sequences
# Generate DP table and traceback path pointer matrix
score = zeros((m+1, n+1)) # the DP table
# Calculate DP table
for i in range(0, m + 1):
score[i][0] = gap_penalty * i
for j in range(0, n + 1):
score[0][j] = gap_penalty * j
for i in range(1, m + 1):
for j in range(1, n + 1):
match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1])
delete = score[i - 1][j] + gap_penalty
insert = score[i][j - 1] + gap_penalty
score[i][j] = max(match, delete, insert)
# Traceback and compute the alignment
align1, align2 = '', ''
i,j = m,n # start from the bottom right cell
while i > 0 and j > 0: # end toching the top or the left edge
score_current = score[i][j]
score_diagonal = score[i-1][j-1]
score_up = score[i][j-1]
score_left = score[i-1][j]
if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]):
align1 += seq1[i-1]
align2 += seq2[j-1]
i -= 1
j -= 1
elif score_current == score_left + gap_penalty:
align1 += seq1[i-1]
align2 += '-'
i -= 1
elif score_current == score_up + gap_penalty:
align1 += '-'
align2 += seq2[j-1]
j -= 1
# Finish tracing up to the top left cell
while i > 0:
align1 += seq1[i-1]
align2 += '-'
i -= 1
while j > 0:
align1 += '-'
align2 += seq2[j-1]
j -= 1
finalize(align1, align2)
needle('kizlerlo','killerpo' )
***********************************************************************************************************************
#import textdistance as txd
import numpy
txd.overlap('kizlerlo','kilerpo' )
txd.jaro('kizlerlo','killerpo' )
txd.cosine('kizlerlo','killerpo' )
#txd.needleman_wunsch('kizlerlo','killerpo' )
txd.jaro_winkler('kizlerlo','killerpo' )
#txd.smith_waterman('Loans and Accounts','Loans Accounts' )
#txd.levenshtein.normalized_similarity('Loans and Accounts','Loans Accounts' )
from scipy.spatial import distance
a = 'kizlerlo'
b = 'kilerpoo'
#txd.gotoh('Loans and Accounts','Loans Accounts' )
print(txd.needleman_wunsch.normalized_similarity('Loans and Accounts','Loans Accounts' ))
***************************************************************************************************************************
#Euclidean
import math
import numpy as np
def euclid(str1,str2):
dist=0.0
x=str1
y=str2
set1=set()
for a in range(0,len(x)):
set1.add(x[a])
for a in range(0,len(y)):
set1.add(y[a])
vec1=[None]*len(set1)
vec2=[None]*len(set1)
for counter,each_char in enumerate(set1):
vec1[counter]=x.count(each_char)
vec2[counter]=y.count(each_char)
dist=1/(1+math.sqrt(sum([(a - b) ** 2 for a, b in zip(vec1, vec2)])))
print(dist)
euclid('kizlerlo','killerpo')
***************************************************************************************************************************
from similarity.qgram import QGram
import affinegap
qgram = QGram(2)
#print(qgram.distance('kizlerlo', 'killerpo'))
affinegap.affineGapDistance('kizlerlokill' ,'erpozlerlzler')
***************************************************************************************************************************
#manhattan
def manhattan(str1,str2):
dist=0.0
x=str1
y=str2
set1=set()
for a in range(0,len(x)):
set1.add(x[a])
for a in range(0,len(y)):
set1.add(y[a])
vec1=[None]*len(set1)
vec2=[None]*len(set1)
for counter,each_char in enumerate(set1):
vec1[counter]=x.count(each_char)
vec2[counter]=y.count(each_char)
#dist= sum([np.abs(a - b) for a, b in zip(vec1, vec2)])
dist=1/(1+sum([np.abs(a - b) for a, b in zip(vec1, vec2)]))
print(dist)
manhattan('kizlerlo','killerpo')
import jellyfish
import json
from Levenshtein import distance,jaro_winkler,jaro,ratio,seqratio
def comp(a,b):
return jellyfish.jaro_winkler(a,b)*100 + distance(a,b) + jaro(a,b)*100
ip = {"CED":"WALMART INC_10958553"}
ala = {}
for index,row in df_ala.iterrows():
a = ip.get("CED")
b = row['NN_UID']
c = comp(a,b)
ala.update({row['N_UID'] : c})
ala_max = max(ala, key=ala.get)
ala_f = {"ALACRA" : ala_max}
ces_f = {"CESIUM" : "WALMART_10958553_CESIUM"}
dun_f = {"DUNS" : "WALMART_10958053_DUNS"}
ref_f = {"REF" : "WALMART INC_10958553_REF"}
cax_f = {"CAX" : "WALMART LTD_10958553_CAX"}
final_op = {**ala_f,**ces_f,**dun_f,**ref_f,**cax_f }
final_json = json.dumps(final_op)
print(final_json)
from flask import Flask,request, jsonify
app = Flask(__name__)
#app.route('/test',methods = ['GET','POST'])
def test():
if request.method == "GET":
return jsonify({"response":"Get request called"})
elif request.method == "POST":
req_Json = request.json
name = req_Json['name']
return jsonify({"response": "Hi" + name})
if __name__ == '__main__':
app.run(debug = True,port = 9090)
{
"name": "Mike"
}
import usaddress
import pandas as pd
import statistics
#sa = dict(usaddress.parse('123 Main St. Suite Chicago, IL' ))
adr = pd.read_excel('C:\\VINAYAK\\Address.xlsx')
adr.columns = ['Address']
strlen = []
scr = []
loop = adr['Address'].tolist()
for i in loop:
strlen.append(len(i))
x = statistics.median(strlen)
for i in loop:
sa = dict(usaddress.parse(i))
sa = list(sa.values())
a = 0
if len(i) > x :
a+= 5
if 'AddressNumber' in sa :
a+= 23
if 'StreetName' in sa :
#a = a + 20
a+= 17
if 'OccupancyType' in sa :
a+= 6
if 'OccupancyIdentifier' in sa :
a+= 12
if 'PlaceName' in sa :
a+= 12
if 'StateName' in sa :
a+= 13
if 'ZipCode' in sa :
a+= 12
scr.append(a)
adr['Adr_Score'] = scr
adr.head()
#(pd.DataFrame([(key) for key in sa.items()])).transpose()
#pd.DataFrame(dict([(value, key) for key, value in sa.items()]))
#pd.DataFrame(dict([(value, key) for key, value in sa.items()]))
# df_ts = pd.DataFrame(columns = ['AddressNumber' , 'Age', 'City' , 'Country'])
# df_ts.append(sa, ignore_index=False, verify_integrity=False, sort=None)
# df_ts.head()
import pandas as pd
from zipfile import ZipFile
# core = []
# f = open('C:/Users/s.natarajakarayalar/1.txt','r')
# core.append(str(f.readlines()))
# print(core)
import os
import zipfile
import re
import nltk
import os
core = []
with zipfile.ZipFile('C:/Users/s.natarajakarayalar/TF.zip') as z:
a = 0
for filename in z.namelist():
#if a < 1:
#if not os.path.isdir(filename):
# read the file
with z.open(filename) as f:
#a = 2
x = f.readlines()
core = core + x
with open('C:/Users/s.natarajakarayalar/fins.txt', 'w') as f:
for item in core:
f.write("%s\n" % item)
# for i in core:
# if k < 5:
# tkt = re.sub(r'.*CONTENT', '', i)
# new_core.append(tkt)
# k = k+1
# for item in core:
# new_core.append(len(item.split()))
# print(sum(new_core))
# from nltk.tokenize import word_tokenize
# new_core = []
# stp = ['URL:https://','TITLE:b','META-KEYWORDS:','None','DOC ID:','CONTENT:b','URL:','TITLE:','META-CONTENT:']
# #new_core = [word for word in core if word not in stopwords]
# for i in core:
# wk = word_tokenize(i)
# for w in wk:
# if w not in stp:
# new_core.append(w)

Ploting results from Gurobi python

import os
import sys
import math
import cvxopt as cvx
import picos as pic
import pandas as pd
import matplotlib.pyplot as plt
from gurobipy import *
from statsmodels.tsa.arima_model import ARIMA
import numpy as np
from scipy import *
#import DeferableLoad
OPTmodel = Model('OPTIMIZER')
#general parameters
Tamb =22
N = 1440 # maximum iteration
i = range(1, N)
COP= 3.4 # Coeffient of performance
'''
Prediction need to be added here
'''
# Datacenter room defintion
R = 10 #length of room
B = 7
H = 9 #Height of room
L = 10
dT = 60
A = 2*((L*B)+(B*H)+(H*L))
Thick = 0.33 # thickness of wall
k = 0.7 # thermal conductivity of wall
mAir = 1.2 * (L * B * H)
C = 718
landa = k * A / Thick
a0 = 0.05 / dT
a1 = 1
ki = math.exp(-(landa * 60) / (mAir * C)) # value that constant and its related to property of room
kc = (1 - ki) * a0
ko = (1 - ki) * a1
kp = (1 - ki) * (COP / landa)
Tmin= 18
Tmax= 27
Tamb= 22
PcoolingRated = 100
Pbess_rated = 30.462
Pbess_ratedN = -30.462
Ebess_min = 0
Ebess_max = 300
with open ('Pcooling.csv','r') as f:
Pcooling = []
for line in f:
Pcooling.append(line)
f.close()
with open ('ITpower.csv','r') as f1:
ITload = []
for line1 in f1:
ITload.append(line1)
f1.close()
with open ('DR.csv','r') as f2:
DR =[]
for line2 in f2:
DR.append(line2)
f2.close()
print ITload
print Pcooling
print DR
for i in range(1,200):
for it in range(1, 1440):
Tm = np.empty(1440)
Tm.fill(18)
TmA = np.empty(1440)
TmA.fill(27)
Phvac_flex = {}
Phvac_up = {}
Phvac_down_= {}
Phvac_up_ = {}
Pbess_out_ = {}
Pbess_in_ = {}
Phvac_down = {}
Pbess_flex_ = {}
Pbess_flex = {}
Phvac_flex_ = {}
Pbess_in = {}
Pdc = {}
Pdc_base = {}
Pflex_i = {}
Tdc_i = {}
Pbess_out ={}
Ebess_i = {}
Phvac_flex[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS,name="PHVAC_flex"+str(i))
Phvac_up[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PHVAC_up" + str(i))
Phvac_up_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PHVAC_up_" + str(i))
Phvac_down_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PHVAC_down_" + str(i))
Pbess_out_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_out_" + str(i))
Pbess_in_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_in_" + str(i))
Phvac_down[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PHVAC_down" + str(i))
Pbess_flex_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_flex_" + str(i))
Pbess_flex[i] = OPTmodel.addVar(lb=-GRB.INFINITY,ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_flex" + str(i))
Phvac_flex_[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PHVAC_flex_" + str(i))
Pbess_in[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_in" + str(i))
Pdc[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PDC" + str(i))
Pdc_base[i] = OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PDC_base" + str(i))
Pflex_i[i]= OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="Pflex_i" + str(i))
Tdc_i[i]= OPTmodel.addVar(ub=GRB.INFINITY,vtype = GRB.CONTINUOUS, name = "Tdc_i" + str(i))
Pbess_out[i] = OPTmodel.addVar(lb=-GRB.INFINITY,ub=GRB.INFINITY,vtype=GRB.CONTINUOUS, name="PBESS_out" + str(i))
Ebess_i[i]= OPTmodel.addVar(ub=GRB.INFINITY,vtype=GRB.CONTINUOUS,name="Ebess_i" + str(i))
Pflex_i[1] = 0
Pflex_i[1] = 0
Tdc_i[0] = 18
Phvac_flex[1] = 0
# Phvac_flex_[1] = 0
Phvac_down[1] = 0
Phvac_up[1] = 0
Phvac_down_[1] = 0
Phvac_up_[1] = 0
# Phvac_down_pos[1] = 0
# Phvac_up_pos(1) = 0;
Pbess_flex[1] = 0
# Pbess_flex_[1] = 0
Pbess_out[1] = 0
Pbess_in[1] = 0
# Pbess_out_[1] = 0
Pbess_in_[1] = 0
# Pbess_out_pos[1] = -250
# Pbess_in_pos(1) = 250;
Ebess_i[1] = 150
OPTmodel.update()
'''
if float(DR[i]) > 0:
Phvac_down_[i] = 0
Phvac_up_[i] = float(DR[i])
Pbess_out_[i] = 0
Pbess_in_[i] = float(DR[i])
#Pbess_flex_[i] = Pbess_in_[i] + Pbess_out_[i]
#Phvac_flex_[i] = Phvac_down_[i] + Phvac_up_[i]
OPTmodel.update()
elif float(DR[i]) < 0:
Phvac_down_[i] = float(DR[i])
Phvac_up_[i] = 0
#Phvac_flex_[i] = Phvac_down_[i] + Phvac_up_[i]
Pbess_out_[i] = float(DR[i])
Pbess_in_[i] = 0
#Pbess_flex_[i] = Pbess_in_[i] + Pbess_out_[i]
OPTmodel.update()
else:
Phvac_down_[i] = 0
Phvac_up_[i] = 0
Phvac_flex_[i] = Phvac_down_[i] + Phvac_up_[i]
Pbess_out_[i] = 0
Pbess_in_[i] = 0
Pbess_flex_[i] = Pbess_in_[i] + Pbess_out_[i]
OPTmodel.update()
'''
#print Phvac_up.values()
#print Phvac_flex_[i]
print OPTmodel
OPTmodel.update()
ConHVAC1 = OPTmodel.addConstr(Phvac_flex[i] == Phvac_up[i] + Phvac_down[i], name='ConHVAC1')
ConHVAC2 = OPTmodel.addConstr(0 <= Phvac_flex[i] , name='ConHVAC2')
ConHVAC3 = OPTmodel.addConstr(Phvac_flex[i] <= PcoolingRated, name='ConHVAC3')
PH = pd.read_csv('Pcooling.csv')
PHVAC = PH.values
newList2 = map(lambda x: x / 1000, PHVAC)
p=[]
p=PcoolingRated-newList2[i]
#CONHVAC4 = OPTmodel.addConstr(Phvac_up[i]==np.minimum((Phvac_up_[i]),(float(newList2[i]))))
#Phvac_u(1:MaxIter) == min(Phvac_u_(1:MaxIter), (repelem(Phvac_max, MaxIter) - (Pcooling(1:MaxIter)'/1000)))
ConTemp1 = OPTmodel.addConstr(Tm[it] <= Tdc_i[i] <= TmA[it], name='ConTemp1')
ConBESS1 = OPTmodel.addConstr(Pbess_ratedN <= Pbess_flex[i] <= Pbess_rated, name='ConBESS1')
ConBESS2 = OPTmodel.addConstr(Pbess_flex[i] == Pbess_in[i] + Pbess_out[i], name='ConBESS2')
ConBESS3 = OPTmodel.addConstr(0 <= Pbess_in[i] <= min(Pbess_rated, Pbess_in_[i]), name='ConBESS3')
ConBESS4 = OPTmodel.addConstr(np.maximum(Pbess_ratedN,Pbess_out_[i]) <= Pbess_out[i]<=0 , name='ConBESS4') # need to modifty
ConEBESS1 = OPTmodel.addConstr(Ebess_min <= Ebess_i[i], name='ConEBESS1')
ConEBESS2 = OPTmodel.addConstr(Ebess_i[i] <= Ebess_max, name='ConEBESS2')
D = pd.read_csv('DR.csv').values
DRN = map(lambda x: x / 1000, D)
PDRN=map(lambda x: x / 4.8, DRN)
if float((PDRN[i])) > 0:
CON1 = OPTmodel.addConstr(Pbess_flex_[i] == Pbess_in_[i] + Pbess_out_[i],'CON1')
CON2 = OPTmodel.addConstr(Phvac_flex_[i] == Phvac_up_[i] + Phvac_down_[i],'CON2')
CON3=OPTmodel.addConstr(Phvac_down_[i] == 0, name='CON3')
CON4=OPTmodel.addConstr(Phvac_up_[i] == float((PDRN[i])),name='CON4')
CON5=OPTmodel.addConstr(Pbess_out_[i] == 0,name='CON5')
CON6=OPTmodel.addConstr(Pbess_in_[i] == float((PDRN[i])),name='CON6')
elif float(np.transpose(PDRN[i])) < 0:
CON7=OPTmodel.addConstr(Phvac_down_[i] == float(np.transpose(PDRN[i])),name='CON7')
CON8=OPTmodel.addConstr(Phvac_up_[i] == 0,name='CON8')
# Phvac_flex_[i] = Phvac_down_[i] + Phvac_up_[i]
CON9=OPTmodel.addConstr(Pbess_out_[i] == float((PDRN[i])),name='CON9')
CON10=OPTmodel.addConstr(Pbess_in_[i] == 0,name='CON10')
else:
CON11=OPTmodel.addConstr(Phvac_down_[i] == 0,name='CON11')
CON12=OPTmodel.addConstr(Phvac_up_[i] == 0,name='CON12')
CON13=OPTmodel.addConstr(Phvac_flex_[i] == Phvac_down_[i] + Phvac_up_[i],name='CON13')
CON14=OPTmodel.addConstr(Pbess_out_[i] == 0)
CON15=OPTmodel.addConstr(Pbess_in_[i] == 0,name='CON15')
CON16=OPTmodel.addConstr(Pbess_flex_[i] == Pbess_in_[i] + Pbess_out_[i],name='CON16')
OPTmodel.update()
ConPDC = OPTmodel.addConstr(Pdc[i] == Pflex_i[i] + float(ITload[i]), name='ConPDC')
# OPTmodel.addConstr(Tdc_i[i]==(ki*Tdc_i[i-1]+(ko*Tamb)))
#for x in Ebess_i:
#ConEBESS2 = OPTmodel.addConstr(Ebess_i[i] ==((Pbess_in[i] / 0.75) + (Pbess_out[i] * 0.75)))
cooling = np.array(pd.read_csv('Pcooling.csv'))
DRR = pd.read_csv('DR.csv')
DR = DRR.values
IT = pd.read_csv('ITpower.csv')
ITload = IT.values
newList = map(lambda x: x / 1000, ITload)
PH = pd.read_csv('Pcooling.csv')
PHVAC = PH.values
newList2 = map(lambda x: x / 1000, PHVAC)
#for y in Tdc_i:
T=pd.read_csv('TT.csv').values
OPTmodel.addConstr(Tdc_i[i]==((ki*float(T[i]))+(ko*Tamb)+(kc*float(newList[i]))-((kp*(float(newList2[i])))+(Phvac_flex[i]*3.14))))
print Tdc_i.values()
OPTmodel.addConstr(Pbess_out_[i]<=Phvac_flex[i] + Pbess_flex[i]<=Pbess_in_[i])
# Tdc_i[1:len(i)]==(Ki*Tdc_i[1:1438])+(Kc*array2[1:1438])+(Ko*Tamb))
ConBESS5 = OPTmodel.addConstr(Pbess_flex[i] == Pbess_in[i] + Pbess_out[i], name='ConBESS5')
#OPTmodel.addConstr(defIT[i]==DeferableLoad.j2 + DeferableLoad.j3)
# OPTmodel.addConstr(Pdc_base[i]==predictions[i])
ConFLEX = OPTmodel.addConstr(Pflex_i[i] == Pbess_flex[i] + Phvac_flex[i], name='ConFLEX')
PcoolingPredicted = pd.read_csv('PcoolingPredictionResult.csv')
PcoolingPredictedValue = PcoolingPredicted.values
ITPredicted = pd.read_csv('ITpredictionResult.csv')
ITPredictedValue = ITPredicted.values
ConPDCbase = OPTmodel.addConstr(Pdc_base[i] == np.transpose(ITPredictedValue[i]) + np.transpose(PcoolingPredictedValue[i]))
OPTmodel.update()
# OPTmodel.addConstr(Pdc_base[i]==prediction[i])
OPTmodel.setObjective((np.transpose(Pdc_base[i])-float(DR[i]) - (Pdc[i]) ), GRB.MINIMIZE)
OPTmodel.update()
OPTmodel.optimize()
print Pdc_base[i].X
#print Ebess_i[i].X
#print Phvac_flex[i].X
print Tdc_i[i]
print Pdc[i]
print Phvac_flex[i]
print Pbess_flex[i]
print Pbess_out[i]
print Pbess_in[i]
print Ebess_i[i]
print Pbess_flex_[i]
print Phvac_down[i]
print Phvac_up[i]
'''
def get_results(self):
"""
This function gets the results of the current optimization model
Returns
-------
"""
HVACresult = np.zeros(1,N)
BatteryResult = np.zeros(1,N)
SOC = np.zeros(1,N)
#r_Q_dot = np.zeros((self.gp.N_H, self.N_S))
#r_P = np.zeros((self.gp.N_H, self.N_S))
#r_P_self = np.zeros((self.gp.N_H, self.N_S))
#r_P_ex = np.zeros((self.gp.N_H, self.N_S))
#r_Q_dot_gas = np.zeros((self.gp.N_H, self.N_S))
#Load = np.zeros((self.gp.N_H, self.N_S))
try:
for t in range(1,N):
HVACresult[t]= Phvac_flex[t].X
BatteryResult[t]=Pbess_flex[t].X
SOC[t] = Ebess_i[t].X / Ebess_max
except:
pass
return { 'SOC' : SOC , 'BatteryResult': BatteryResult }
print OPTmodel.getVars()
# get results
Temp = {}
Battery = {}
Ebess_result = {}
ITloadd = {}
for t in range(1,N):
Temp[t] = OPTmodel.getVarByName("Tdc_i" )
Battery[t] = OPTmodel.getVarByName("PBESS_flex" )
Ebess_result[t] = OPTmodel.getVarByName("Ebess_i" )
#r_P_e[t] = model.getVarByName("P_export_%s_0" % t).X
fig, axes = plt.subplots(4, 1)
# plot elctricity
ax5 = axes[2]
ax6 = ax5.twinx()
ax5.plot( [Temp[t] for t in range(1,N)], 'g-')
ax6.plot([Ebess_result[t] for t in range(1,N)], 'b-')
ax5.set_xlabel('Time index')
ax5.set_ylabel('Power Import [W]', color='g')
ax6.set_ylabel('Power CHP [W]', color='b')
ax7 = axes[3]
ax7.plot([Battery[t] for t in range(1,N)], 'g-')
ax7.set_ylabel('Power Export [W]', color='g')
'''
print Pflex_i.values()
# print OPTmodel.getVars()
print OPTmodel.feasibility()
print OPTmodel.getObjective()
print Pdc_base.values()
'''
b = map(float, Phvac_flex)
plt.plot(b)
plt.show()
'''
#c = map(float, Pbess_flex_)
#plt.plot(c)
#plt.show()
print OPTmodel
print Tdc_i.values()
# get results
print OPTmodel.getVars()
# print OPTmodel.getAttr('EBESS_i')
status = OPTmodel.status
print status
# print Con10,Con12
print Phvac_flex.values()
print Pbess_flex.values()
print Ebess_i.values()
print OPTmodel.objval
print Tdc_i
print Pbess_in
print Pbess_out.values()
# print Pbess_flex
# print Phvac_flex
# print Ebess_i
print Pflex_i.values()
print Pbess_flex_.values()
#print OPTmodel.getVars()
print OPTmodel.feasibility()
print OPTmodel.getObjective()
print Ebess_i.values()
if OPTmodel.status == GRB.Status.INF_OR_UNBD:
# Turn presolve off to determine whether model is infeasible
# or unbounded
OPTmodel.setParam(GRB.Param.Presolve, 0)
OPTmodel.optimize()
OPTmodel.write("mymodel.lp")
if OPTmodel.status == GRB.Status.OPTIMAL:
print('Optimal objective: %g' % OPTmodel.objVal)
OPTmodel.write('model.sol')
exit(0)
elif OPTmodel.status != GRB.Status.INFEASIBLE:
print('Optimization was stopped with status %d' % OPTmodel.status)
exit(0)
# Model is infeasible - compute an Irreducible Inconsistent Subsystem (IIS)
print('')
print('Model is infeasible')
OPTmodel.computeIIS()
OPTmodel.write("model.ilp")
print("IIS written to file 'model.ilp'")
I want to plot the computed values from gurobi but when I want to get the X attribute of gurobi variable it says that AttributeError: it has no attribute 'X' and the when I cast the value from float to int it just showed me the empty plot but at the lp file I could see the result of each iteration
I am anxiously waiting for your response
cherrs

Categories

Resources