Related
When I use splitter_freqy.py to split my .dat filem I meet the following problem.
line 5 name=str(sys.argv[position+1])
indexerror list index out of range
I use ubuntu and the command is
python3.6 splitter_freq.py abc.dat
This is the splitter_freq.py
import sys
arguments = len(sys.argv) - 1
position = 1
name = str(sys.argv[position+1])
while (arguments > position): # changed >= to > to avoid error
bench_freq_names = []
path0 = "./%s/"%name+sys.argv[position]
position = position+1
print("the input file is ",path0)
c_0 = open(path0,'r')
header=c_0.readline()
l_0 = c_0.readline()
w_0 = l_0.split()
bench_name = w_0[1]
freq_value = w_0[3]
path_out = path0.split("/")
path_out = path_out[-1].split(".")
print("the benchmark name is ",bench_name)
print("the freq value is ",freq_value)
bench_freq_names.append([bench_name,freq_value])
output_file = "./%s/"%name+path_out[0]+"_"+bench_name+"_"+freq_value+".txt"
m = open(output_file,'a')
print(header.rstrip('\n'),file=m)
print(l_0.rstrip('\n'),file=m)
for l_0 in c_0: #read lines one by one
w_0 = l_0.split()
if (w_0[1] == bench_name and w_0[3] == freq_value):
print(l_0.rstrip('\n'),file=m)
else: #new_bench_name or new freq_value
m.close() #close file
bench_name = w_0[1] #update bench
freq_value = w_0[3] #update freq
print("the benchmark name is ",bench_name)
print("the freq value is ",freq_value)
output_file = "./%s/"%name+path_out[0]+"_"+bench_name+"_"+freq_value+".txt"
m = open(output_file,'a')
if [bench_name,freq_value] not in bench_freq_names:
bench_freq_names.append([bench_name,freq_value])
print(header.rstrip('\n'),file=m)
print(l_0.rstrip('\n'),file=m)
c_0.close()
m.close()
I import technicals.py into bot.py and want to reuse the variable sl and tp from the class instance process_candles.
If a constant number is given to sl and tp in bot.py, the script is able to work. However, the desired result is to get variable sl and tp which is calculated in the class instance process_candles. from technicals.py.
snippet technicals.py as below:
df['PAIR'] = self.pair
decision = NONE
tp = 0
sl = 0
if c[-2]>o[-2]:
if ca[-1]>h[-2]+0.0010:
decision = BUY
tp = ca[-1]+0.010
sl = l[-2]-0.010
elif o[-2]>c[-2]:
if cb[-1]<l[-2]-0.0010:
decision = SELL
tp = cb[-1]-0.010
sl = h[-2]+0.010
else:
decision = NONE
snippet bot.py
def process_pairs(self):
trades_to_make = []
for pair in self.trade_pairs:
if self.timings[pair].ready == True:
self.log_message(f"Ready to trade {pair}")
techs = Technicals(self.settings[pair], self.api, pair, GRANULARITY, log=self.tech_log)
decision = techs.get_trade_decision(self.timings[pair].last_candle)
print ("process decision")
print (decision)
units = decision * self.settings[pair].units
#tp = "154"
#sl = "153"
if units != 0:
trades_to_make.append({'pair': pair, 'units': units,'take_profit':tp, 'stop_loss':sl})
Full script are as below:
technicals.py
import pandas as pd
import numpy as np
from defs import BUY, SELL, NONE
class Technicals():
def __init__(self, settings, api, pair, granularity, log=None):
self.settings = settings
self.log = log
self.api = api
self.pair = pair
self.granularity = granularity
def log_message(self, msg):
if self.log is not None:
self.log.logger.debug(msg)
def fetch_candles(self, row_count, candle_time):
status_code, df = self.api.fetch_candles(self.pair, count=row_count, granularity=self.granularity)
if df is None:
self.log_message(f"Error fetching candles for pair:{self.pair} {candle_time}, df None")
return None
elif df.iloc[-1].time != candle_time:
self.log_message(f"Error fetching candles for pair:{self.pair} {candle_time} vs {df.iloc[-1].time}")
return None
else:
return df
def process_candles(self, df):
open = df.mid_o
o = np.array(open,dtype='float')
#print (o)
high = df.mid_h
h = np.array(high,dtype='float')
#print (h)
low = df.mid_l
l = np.array(low,dtype='float')
#print (l)
close = df.mid_c
c = np.array(close,dtype='float')
print (c)
close_ask = df.ask_c
ca = np.array(close_ask,dtype='float')
print (ca)
close_bid = df.bid_c
cb = np.array(close_bid,dtype='float')
print (cb)
df['PAIR'] = self.pair
decision = NONE
tp = 0
sl = 0
if c[-2]>o[-2]:
if ca[-1]>h[-2]+0.0010:
decision = BUY
tp = ca[-1]+0.010
sl = l[-2]-0.010
elif o[-2]>c[-2]:
if cb[-1]<l[-2]-0.0010:
decision = SELL
tp = cb[-1]-0.010
sl = h[-2]+0.010
else:
decision = NONE
log_cols = ['time','volume','PAIR','bid_c','ask_c','mid_o','mid_h','mid_l','mid_c']
self.log_message(f"Processed_df\n{df[log_cols].tail(3)}")
self.log_message(f"Trade_decision:{decision}")
self.log_message("")
return decision
def get_trade_decision(self, candle_time):
max_rows = self.settings.long_ma + 2
self.log_message("")
self.log_message(f"get_trade_decision() pair:{self.pair} max_rows:{max_rows}")
df = self.fetch_candles(max_rows, candle_time)
print ("xxxx")
print (df)
if df is not None:
return self.process_candles(df)
print("get trade decision")
print(self.process_candles(df))
return NONE
bot.py
import pprint
import time
from settings import Settings
from log_wrapper import LogWrapper
from timing import Timing
from oanda_api import OandaAPI
from technicals import Technicals
from defs import NONE, BUY, SELL
from trade_manager import TradeManager
GRANULARITY = "M1"
SLEEP = 10.0
class TradingBot():
def __init__(self):
self.log = LogWrapper("Bot")
self.tech_log = LogWrapper("Technicals")
self.trade_log = LogWrapper("Trade")
self.trade_pairs = Settings.get_pairs()
self.settings = Settings.load_settings()
self.api = OandaAPI()
self.trade_manager = TradeManager(self.api, self.settings, self.trade_log)
self.timings = { p: Timing(self.api.last_complete_candle(p, GRANULARITY)) for p in self.trade_pairs }
self.log_message(f"Bot started with\n{pprint.pformat(self.settings)}")
self.log_message(f"Bot Timings\n{pprint.pformat(self.timings)}")
print (self.api)
def log_message(self, msg):
self.log.logger.debug(msg)
def update_timings(self):
for pair in self.trade_pairs:
current = self.api.last_complete_candle(pair, GRANULARITY)
self.timings[pair].ready = False
if current > self.timings[pair].last_candle:
self.timings[pair].ready = True
self.timings[pair].last_candle = current
self.log_message(f"{pair} new candle {current}")
def process_pairs(self):
trades_to_make = []
for pair in self.trade_pairs:
if self.timings[pair].ready == True:
self.log_message(f"Ready to trade {pair}")
techs = Technicals(self.settings[pair], self.api, pair, GRANULARITY, log=self.tech_log)
decision = techs.get_trade_decision(self.timings[pair].last_candle)
print ("process decision")
print (decision)
units = decision * self.settings[pair].units
#tp = "154"
#sl = "153"
if units != 0:
trades_to_make.append({'pair': pair, 'units': units,'take_profit':tp, 'stop_loss':sl})
if len(trades_to_make) > 0:
print("bot")
print(trades_to_make)
self.trade_manager.place_trades(trades_to_make)
def run(self):
while True:
self.update_timings()
self.process_pairs()
time.sleep(SLEEP)
if __name__ == "__main__":
b = TradingBot()
b.run()
defs.py
API_KEY = "xxxx"
ACCOUNT_ID = "xyz"
OANDA_URL = 'https://api-fxpractice.oanda.com/v3'
SECURE_HEADER = {
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json'
}
BUY = 1
SELL = -1
NONE = 0
Instead of just returning the decision, also return the take profit and stop loss values:
return decision, tp, sl
Then you can unpack the tuple in process_pairs:
decision, tp, sl = techs.get_trade_decision(self.timings[pair].last_candle)
You can define your tp and sl as class variables of Technicals.
class Technicals(object):
tp: int = 0
sl: int = 0
and use them within Technicals as:
cls.tp = ... # if you are inside class-method
self.tp = ... # if you are inside instance-method
And in the TradingBot you can then simple import Technicals and use the class-Vars like:
tp = Technicals.tp # you can use the class
tp = techs.tp # or the instance you already have
I'm working on a Deep Learning project where I use a bidirectional attention flow model (allennlp pretrained model)to make a question answering system.It uses squad dataset.The bidaf model extracts the answer span from paragraph.Is there any way to determine the confidence score(accuracy)or any other metrics of the answer extracted by the model?
I have used the subcommand evaluate from the allennlp package but it determines only score of the model after testing.I was hoping there is a much easier way to solve the issue using other such command.
Attaching the code and the terminal output below.
from rake_nltk import Rake
from string import punctuation
from nltk.corpus import stopwords
from allennlp.predictors.predictor import Predictor
import spacy
import wikipedia
import re
import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import traceback
from nltk.stem import SnowballStemmer
from nltk.util import ngrams
from math import log10
from flask import Flask, request, jsonify, render_template
from gevent.pywsgi import WSGIServer
import time
import multiprocessing as mp
from gtts import gTTS
import os
NLP = spacy.load('en_core_web_md')
stop = stopwords.words('english')
symbol = r"""!#$%^&*();:\n\t\\\"!\{\}\[\]<>-\?"""
stemmer = SnowballStemmer('english')
wikipedia.set_rate_limiting(True)
session = HTMLSession()
results = 5
try:
predictor = Predictor.from_path("bidaf-model-2017.09.15-charpad.tar.gz")
except:
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz")
try:
srl = Predictor.from_path('srl-model-2018.05.25.tar.gz')
except:
srl = Predictor.from_path('https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz')
key = Rake(min_length=1, stopwords=stop, punctuations=punctuation, max_length=6)
wh_words = "who|what|how|where|when|why|which|whom|whose|explain".split('|')
stop.extend(wh_words)
session = HTMLSession()
output = mp.Queue()
def termFrequency(term, doc):
normalizeTermFreq = re.sub('[\[\]\{\}\(\)]', '', doc.lower()).split()
normalizeTermFreq = [stemmer.stem(i) for i in normalizeTermFreq]
dl = len(normalizeTermFreq)
normalizeTermFreq = ' '.join(normalizeTermFreq)
term_in_document = normalizeTermFreq.count(term)
#len_of_document = len(normalizeTermFreq )
#normalized_tf = term_in_document / len_of_document
normalized_tf = term_in_document
return normalized_tf, normalizeTermFreq, dl#, n_unique_term
def inverseDocumentFrequency(term, allDocs):
num_docs_with_given_term = 0
for doc in allDocs:
if term in doc:
num_docs_with_given_term += 1
if num_docs_with_given_term > 0:
total_num_docs = len(allDocs)
idf_val = log10(((total_num_docs+1) / num_docs_with_given_term))
term_split = term.split()
if len(term_split) == 3:
if len([term_split[i] for i in [0, 2] if term_split[i] not in stop]) == 2:
return idf_val*1.5
return idf_val
return idf_val
else:
return 0
def sent_formation(question, answer):
tags_doc = NLP(question)
tags_doc_cased = NLP(question.title())
tags_dict_cased = {i.lower_:i.pos_ for i in tags_doc_cased}
tags_dict = {i.lower_:i.pos_ for i in tags_doc}
question_cased = []
for i in question[:-1].split():
if tags_dict[i] == 'PROPN' or tags_dict[i] == 'NOUN':
question_cased.append(i.title())
else:
question_cased.append(i.lower())
question_cased.append('?')
question_cased = ' '.join(question_cased)
#del tags_dict,tags_doc, tags_doc_cased
pre = srl.predict(question_cased)
verbs = []
arg1 = []
for i in pre['verbs']:
verbs.append(i['verb'])
if 'B-ARG1' in i['tags']:
arg1.append((i['tags'].index('B-ARG1'), i['tags'].count('I-ARG1'))\
if not pre['words'][i['tags'].index('B-ARG1')].lower() in wh_words else \
(i['tags'].index('B-ARG2'), i['tags'].count('I-ARG2')))
arg1 = arg1[0] if arg1 else []
if not arg1:
verb_idx = pre['verbs'][0]['tags'].index('B-V')
verb = pre['words'][verb_idx] if pre['words'][verb_idx] != answer.split()[0].lower() else ''
subj_uncased = pre['words'][verb_idx+1:] if pre['words'][-1] not in symbol else \
pre['words'][verb_idx+1:-1]
else:
verb = ' '.join(verbs)
subj_uncased = pre['words'][arg1[0]:arg1[0]+arg1[1]+1]
conj = ''
if question.split()[0].lower() == 'when':
conj = ' on' if len(answer.split()) > 1 else ' in'
subj = []
for n, i in enumerate(subj_uncased):
if tags_dict_cased[i.lower()] == 'PROPN' and tags_dict[i.lower()] != 'VERB' or n == 0:
subj.append(i.title())
else:
subj.append(i.lower())
subj[0] = subj[0].title()
print(subj)
print(pre)
subj = ' '.join(subj)
sent = "{} {}{} {}.".format(subj, verb, conj, answer if answer[-1] != '.' else answer[:-1])
return sent
class extractAnswer:
def __init__(self):
self.wiki_error = (wikipedia.exceptions.DisambiguationError,
wikipedia.exceptions.HTTPTimeoutError,
wikipedia.exceptions.WikipediaException)
self.article_title = None
# symbol = """!#$%^&*();:\n\t\\\"!\{\}\[\]<>-\?"""
def extractAnswer_model(self, passage, question, s=0.4, e=0.3, wiki=False):
if type(passage) == list:
passage = " ".join(passage)
if not question[-1] == '?':
question = question+'?'
pre = predictor.predict(passage=passage, question=question)
if wiki:
if max(pre['span_end_probs']) > 0.5:
s = 0.12
elif max(pre['span_end_probs']) > 0.4:
s = 0.13
elif max(pre['span_end_probs']) > 0.35:
s = 0.14
if max(pre['span_start_probs']) > 0.5:
e = 0.12
elif max(pre['span_start_probs']) > 0.4:
e = 0.14
elif max(pre['span_start_probs']) > 0.3:
e = 0.15
if max(pre['span_start_probs']) > s and max(pre['span_end_probs']) > e:
key.extract_keywords_from_text(question)
ques_key = [stemmer.stem(i) for i in ' '.join(key.get_ranked_phrases())]
key.extract_keywords_from_text(passage)
pass_key = [stemmer.stem(i) for i in ' '.join(key.get_ranked_phrases())]
l = len(ques_key)
c = 0
for i in ques_key:
if i in pass_key:
c += 1
if c >= l/2:
print(max(pre['span_start_probs']),
max(pre['span_end_probs']))
if wiki:
return pre['best_span_str'], max(pre['span_start_probs']) + max(pre['span_end_probs'])
try:
ans = sent_formation(question, pre['best_span_str'])
except:
ans = pre['best_span_str']
print(traceback.format_exc())
return ans
print(ques_key, c, l)
print(max(pre['span_start_probs']), max(pre['span_end_probs']))
return 0, 0
else:
print(max(pre['span_start_probs']), max(pre['span_end_probs']), pre['best_span_str'])
return 0, 0
def wiki_search_api(self, query):
article_list = []
try:
article_list.extend(wikipedia.search(query, results=results))
print(article_list)
return article_list
except self.wiki_error:
params = {'search': query, 'profile': 'engine_autoselect',
'format': 'json', 'limit': results}
article_list.extend(requests.get('https://en.wikipedia.org/w/api.php?action=opensearch',
params=params).json()[1])
return article_list
except:
print('Wikipedia search error!')
print(traceback.format_exc())
return 0
def wiki_passage_api(self, article_title, article_list, output):
# Disambiguation_title = {}
try:
passage = wikipedia.summary(article_title)
output.put((article_title, self.passage_pre(passage)))
except wikipedia.exceptions.DisambiguationError as e:
print(e.options[0], e.options)
Disambiguation_pass = {}
for p in range(2 if len(e.options) > 1 else len(e.options)):
params = {'search':e.options[p], 'profile':'engine_autoselect', 'format':'json'}
article_url = requests.get('https://en.wikipedia.org/w/api.php?action=opensearch',
params=params).json()
if not article_url[3]:
continue
article_url = article_url[3][0]
r = session.get(article_url)
soup = BeautifulSoup(r.html.raw_html)
print(soup.title.string)
article_title_dis = soup.title.string.rsplit('-')[0].strip()
if article_title_dis in article_list:
print('continue')
continue
try:
url = "https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={}".format(article_title_dis)
passage = requests.get(url).json()['query']['pages']
for i in passage.keys():
if 'extract' in passage[i]:
Disambiguation_pass[article_title_dis] = self.passage_pre(passage[i]['extract'])
except wikipedia.exceptions.HTTPTimeoutError:
passage = wikipedia.summary(article_title_dis)
Disambiguation_pass[article_title_dis] = self.passage_pre(passage)
except:
Disambiguation_pass[article_title_dis] = ''
continue
output.put((article_title, Disambiguation_pass))
except:
output.put((article_title, ''))
print(traceback.format_exc())
def sorting(self, article, question, topic):
processes = [mp.Process(target=self.wiki_passage_api, args=(article[x], article, output))\
for x in range(len(article))]
for p in processes:
p.start()
for p in processes:
p.join(timeout=3)
results_p = [output.get() for p in processes]
article_list = []
passage_list = []
for i, j in results_p:
if type(j) != dict and j:
article_list.append(i)
passage_list.append(j)
elif type(j) == dict and j:
for k, l in j.items():
if l:
article_list.append(k)
passage_list.append(l)
normalize_passage_list = []
start = time.time()
keywords = " ".join(self.noun+self.ques_key+[topic.lower()])
keywords = re.sub('[{0}]'.format(symbol), ' ', keywords).split()
question = question+' '+topic
ques_tokens = [stemmer.stem(i.lower()) for i in question.split() \
if i.lower() not in wh_words]
print(ques_tokens)
keywords_bigram = [' '.join(i) for i in list(ngrams(ques_tokens, 2)) \
if i[0] not in stop and i[1] not in stop]
if len(ques_tokens) > 3:
keywords_trigram = [' '.join(i) for i in list(ngrams(ques_tokens, 3)) \
if (i[0] in stop) + (i[2] in stop) + (i[1] in stop) < 3]
else:
keywords_trigram = []
if len(ques_tokens) > 5:
keywords_4gram = [' '.join(i) for i in list(ngrams(ques_tokens, 4)) \
if (i[0] in stop) + (i[2] in stop) +(i[1] in stop)+(i[3] in stop) < 4]
else:
keywords_4gram = []
keywords_unigram = list(set([stemmer.stem(i.lower()) for i in keywords \
if i.lower() not in stop]))
keywords = keywords_unigram+list(set(keywords_bigram))+keywords_trigram+keywords_4gram
tf = []
if not passage_list:
return 0
pass_len = []
#n_u_t=[]
#key_dict = {i: keywords.count(i) for i in keywords}
print('Extraction complete')
#remove_pass={}
#for n,i in enumerate(passage_list):
#if len(i)<200 or not i:
#remove_pass[article_list[n]]=i
#print(n, article_list[n])
#passage_list=[i for i in passage_list if i not in remove_pass.values()]
#article_list=[i for i in article_list if i not in remove_pass.keys()]
passage_list_copy = passage_list.copy()
article_list_copy = article_list.copy()
for i in range(len(passage_list_copy)):
if passage_list.count(passage_list_copy[i]) > 1:
passage_list.remove(passage_list_copy[i])
article_list.remove(article_list_copy[i])
print('Copy:', article_list_copy[i])
del passage_list_copy
del article_list_copy
for n, i in enumerate(passage_list):
temp_tf = {}
c = 0
for j in keywords:
temp_tf[j], temp_pass, temp_len = termFrequency(j, i + ' ' + article_list[n])
if temp_tf[j]:
c += 1
normalize_passage_list.append(temp_pass)
pass_len.append(temp_len)
temp_tf['key_match'] = c
tf.append(temp_tf)
print(pass_len)
print(keywords)
idf = {}
for i in keywords:
idf[i] = inverseDocumentFrequency(i, normalize_passage_list)
#print(tf, idf)
tfidf = []
#b=0.333 #for PLN
b, k = 0.75, 1.2 #for BM25
avg_pass_len = sum(pass_len)/len(pass_len)
#pivot=sum(n_u_t)/len(n_u_t)
for n, i in enumerate(tf):
tf_idf = 0
#avg_tf=sum(i.values())/len(i)
key_match_ratio = i['key_match']/len(keywords)
for j in keywords:
#tf_idf+=idf[j]*((log(1+log(1+i[j])))/(1-b+(b*pass_len[n]/avg_pass_len))) #PLN
tf_idf += idf[j]*(((k+1)*i[j])/(i[j]+k*(1-b+(b*pass_len[n]/avg_pass_len)))) #BM25
tfidf.append(tf_idf*key_match_ratio)
tfidf = [i/sum(tfidf)*100 for i in tfidf if any(tfidf)]
if not tfidf:
return 0, 0, 0, 0, 0
print(tfidf)
print(article_list, len(passage_list))
if len(passage_list) > 1:
sorted_tfidf = sorted(tfidf, reverse=1)
idx1 = tfidf.index(sorted_tfidf[0])
passage1 = passage_list[idx1]
#article_title=
tfidf1 = sorted_tfidf[0]
idx2 = tfidf.index(sorted_tfidf[1])
passage2 = passage_list[idx2]
article_title = (article_list[idx1], article_list[idx2])
tfidf2 = sorted_tfidf[1]
else:
article_title = 0
tfidf2 = 0
if passage_list:
passage1 = passage_list[0]
tfidf1 = tfidf[0]
passage2 = 0
else:
passage1 = 0
passage2 = 0
tfidf1, tfidf2 = 0, 0
end = time.time()
print('TFIDF time:', end-start)
return passage1, passage2, article_title, tfidf1, tfidf2
def passage_pre(self, passage):
#passage=re.findall("[\da-zA-z\.\,\'\-\/\–\(\)]*", passage)
passage = re.sub('\n', ' ', passage)
passage = re.sub('\[[^\]]+\]', '', passage)
passage = re.sub('pronunciation', '', passage)
passage = re.sub('\\\\.+\\\\', '', passage)
passage = re.sub('{.+}', '', passage)
passage = re.sub(' +', ' ', passage)
return passage
def wiki(self, question, topic=''):
if not question:
return 0
question = re.sub(' +', ' ', question)
question = question.title()
key.extract_keywords_from_text(question)
self.ques_key = key.get_ranked_phrases()
doc = NLP(question)
self.noun = [str(i).lower() for i in doc.noun_chunks if str(i).lower() not in wh_words]
print(self.ques_key, self.noun)
question = re.sub('[{0}]'.format(symbol), ' ', question)
if not self.noun + self.ques_key:
return 0
article_list = None
question = question.lower()
if self.noun:
if len(self.noun) == 2 and len(" ".join(self.noun).split()) < 6:
#question1=question
self.noun = " ".join(self.noun).split()
if self.noun[0] in stop:
self.noun.pop(0)
self.noun = question[question.index(self.noun[0]):question.index(self.noun[-1]) \
+len(self.noun[-1])+1].split()
#del question1
print(self.noun)
article_list = self.wiki_search_api(' '.join(self.noun))
if self.ques_key and not article_list:
article_list = self.wiki_search_api(self.ques_key[0])
if not article_list:
article_list = self.wiki_search_api(' '.join(self.ques_key))
if not article_list:
print('Article not found on wikipedia.')
return 0, 0
article_list = list(set(article_list))
passage1, passage2, article_title, tfidf1, tfidf2 = self.sorting(article_list,
question, topic)
if passage1:
ans1, conf1 = self.extractAnswer_model(passage1, question, s=0.20, e=0.20, wiki=True)
else:
ans1, conf1 = 0, 0
if ans1:
conf2 = 0
if len(ans1) > 600:
print(ans1)
print('Repeat')
ans1, conf1 = self.extractAnswer_model(ans1, question, s=0.20, e=0.20, wiki=True)
threshhold = 0.3 if not ((tfidf1- tfidf2) <= 10) else 0.2
if round(tfidf1- tfidf2) < 5:
threshhold = 0
if (tfidf1- tfidf2) > 20:
threshhold = 0.35
if (tfidf1- tfidf2) > 50:
threshhold = 1
if (passage2 and conf1 < 1.5) or (tfidf1 - tfidf2) < 10:
ans2, conf2 = self.extractAnswer_model(passage2, question, s=0.20, e=0.20,
wiki=True) if passage2 else (0, 0)
title = 0
if round(conf1, 2) > round(conf2, 2) - threshhold:
print('ans1')
ans = ans1
title = article_title[0] if article_title else 0
else:
print('ans2')
title = article_title[1] if article_title else 0
ans = ans2
if not question[-1] == '?':
question = question+'?'
try:
ans = sent_formation(question, ans)
except:
print(traceback.format_exc())
print(ans, '\n', '\n', article_title)
return ans, title
extractor = extractAnswer()
app = Flask(__name__)
#app.route("/", methods=["POST", "get"])
#app.route("/ans")
def ans():
start = time.time()
question = request.args.get('question')
topic = request.args.get('topic')
passage = request.args.get('passage')
if not question:
return render_template('p.html')
if not topic:
topic = ''
if passage:
answer = extractor.extractAnswer_model(passage, question)
else:
answer, title = extractor.wiki(question, topic)
end = time.time()
if answer:
mytext = str(answer)
language = 'en'
myobj = gTTS(text=mytext, lang=language, slow=False)
myobj.save("welcome.mp3")
# prevName = 'welcome.mp3'
#newName = 'static/welcome.mp3'
#os.rename(prevName,newName)
return render_template('pro.html', answer=answer)
else:
return jsonify(Status='E', Answer=answer, Time=end-start)
#app.route("/audio_del/", methods=["POST", "get"])
def audio_del():
return render_template('p.html');
#app.route("/audio_play/", methods=["POST", "get"])
def audio_play():
os.system("mpg321 welcome.mp3")
return render_template('white.html')
if __name__ == "__main__":
PORT = 7091
HTTP_SERVER = WSGIServer(('0.0.0.0', PORT), app)
print('Running on',PORT, '...')
HTTP_SERVER.serve_forever()
![Output in the terminal for a question I've asked](https://i.stack.imgur.com/6pyv5.jpg)
I came across a possible solution to this after deeply looking into the output returned by the model. Although this, is probably not something you can accurately rely on, it seemed to have done the task in my case:
Note that the text answer which is "best_span_str" is always a subarray of the passage. It spans the range which is stored in "best_span".
i.e., "best_span" contains the start and end index of the answer.
Now, the output data contains a property named "span_end_probs".
"span_end_probs" contains a list of values that correspond to all the words present in the text input.
If you look closely for various inputs, the value is always maximum at one of the indexes within the starting and ending range that "best_span" contains. This value seemed to be very similar to the confidence levels that we need. Let's call this value score. All you need to do now is to try some inputs and find a suitable method to use this score as a metric.
e.g.: if you need a threshold value for some application, you can try a number of test inputs and find a value that is most accurate. In my case, this was around 0.35.
i.e. if score is lesser than 0.35, it prints answer not found and if greater than or equal 0.35, prints string in "best_span_str".
Here's my code snippet:
from allennlp.predictors.predictor import Predictor
passage = '--INPUT PASSAGE--'
question = '--INPUT QUESTION--'
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo.2021-02-11.tar.gz")
output = predictor.predict(
passage = passage,
question = question
)
score = max(output["span_end_probs"])
if score < 0.35:
print('Answer not found')
else:
print(output["best_span_str"])
You can readily see the example input and output here.
I just can't get it done. Therefore I'll post the full code.
The .csv used is from http://www.football-data.co.uk/mmz4281/1415/E0.csv
Now when run, the variables home_team_a, home_team_d, away_team_a and away_team_d are based on all of the previous matches but I want them to be based always on the last 6 matches.
import csv, math, ast, numpy as np
def poisson(actual, mean):
return math.pow(mean, actual) * math.exp(-mean) / math.factorial(actual)
csvFile = '20152016.csv'
team_list = []
k = open('team_list.txt', 'w')
k.write("""{
""")
csvRead = csv.reader(open(csvFile))
next(csvRead)
for row in csvRead:
if row[2] not in team_list:
team_list.append(row[2])
if row[3] not in team_list:
team_list.append(row[3])
team_list.sort()
for team in team_list:
k.write(""" '%s': {'home_goals': 0, 'away_goals': 0, 'home_conceded': 0, 'away_conceded': 0, 'home_games': 0, 'away_games': 0, 'alpha_h': 0, 'beta_h': 0, 'alpha_a': 0, 'beta_a': 0},
""" % (team))
k.write("}")
k.close()
s = open('team_list.txt', 'r').read()
dict = ast.literal_eval(s)
GAMES_PLAYED = 0
WEEKS_WAIT = 4
TOTAL_VALUE = 0
csvRead = csv.reader(open(csvFile))
next(csvRead)
for game in csvRead:
home_team = game[2]
away_team = game[3]
home_goals = int(game[4])
away_goals = int(game[5])
home_win_prob = 0
draw_win_prob = 0
away_win_prob = 0
curr_home_goals = 0
curr_away_goals = 0
avg_home_goals = 1
avg_away_goals = 1
team_bet = ''
ev_bet = ''
# GETTING UPDATED VARIABLES
for key, value in dict.items():
curr_home_goals += dict[key]['home_goals']
curr_away_goals += dict[key]['away_goals']
if GAMES_PLAYED > (WEEKS_WAIT * 10):
avg_home_goals = curr_home_goals / (GAMES_PLAYED)
avg_away_goals = curr_away_goals / (GAMES_PLAYED)
# CALCULATING FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
home_team_a = (dict[home_team]['alpha_h'] + dict[home_team]['alpha_a']) / 2
away_team_a = (dict[away_team]['alpha_h'] + dict[away_team]['alpha_a']) / 2
home_team_d = (dict[home_team]['beta_h'] + dict[home_team]['beta_a']) / 2
away_team_d = (dict[away_team]['beta_h'] + dict[away_team]['beta_a']) / 2
home_team_exp = avg_home_goals * home_team_a * away_team_d
away_team_exp = avg_away_goals * away_team_a * home_team_d
# RUNNING POISSON
l = open('poisson.txt', 'w')
for i in range(10):
for j in range(10):
prob = poisson(i, home_team_exp) * poisson(j, away_team_exp)
l.write("Prob%s%s = %s\n" % (i, j, prob))
l.close()
with open('poisson.txt') as f:
for line in f:
home_goals_m = int(line.split(' = ')[0][4])
away_goals_m = int(line.split(' = ')[0][5])
prob = float(line.split(' = ')[1])
if home_goals_m > away_goals_m:
home_win_prob += prob
elif home_goals_m == away_goals_m:
draw_win_prob += prob
elif home_goals_m < away_goals_m:
away_win_prob += prob
#CALCULATE VALUE
bet365odds_h, bet365odds_d, bet365odds_a = float(game[23]), float(game[24]), float(game[25])
ev_h = (home_win_prob * (bet365odds_h - 1)) - (1 - home_win_prob)
ev_d = (draw_win_prob * (bet365odds_d - 1)) - (1 - draw_win_prob)
ev_a = (away_win_prob * (bet365odds_a - 1)) - (1 - away_win_prob)
highestEV = max(ev_h, ev_d, ev_a)
if (ev_h == highestEV) and (ev_h > 0):
team_bet = home_team
ev_bet = ev_h
if home_goals > away_goals:
TOTAL_VALUE += (bet365odds_h - 1)
else:
TOTAL_VALUE -= 1
elif (ev_d == highestEV) and (ev_d > 0):
team_bet = 'Draw'
ev_bet = ev_d
if home_goals == away_goals:
TOTAL_VALUE += (bet365odds_d - 1)
else:
TOTAL_VALUE -= 1
elif (ev_a == highestEV) and (ev_a > 0):
team_bet = away_team
ev_bet = ev_a
if home_goals < away_goals:
TOTAL_VALUE += (bet365odds_a - 1)
else:
TOTAL_VALUE -= 1
if (team_bet != '') and (ev_bet != ''):
print ("Bet on '%s' (EV = %s)" % (team_bet, ev_bet))
print (TOTAL_VALUE)
# UPDATE VARIABLES AFTER MATCH HAS BEEN PLAYED
dict[home_team]['home_goals'] += home_goals
dict[home_team]['home_conceded'] += away_goals
dict[home_team]['home_games'] += 1
dict[away_team]['away_goals'] += away_goals
dict[away_team]['away_conceded'] += home_goals
dict[away_team]['away_games'] += 1
GAMES_PLAYED += 1
# CREATE FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
for key, value in dict.items():
alpha_h = (dict[key]['home_goals'] / dict[key]['home_games']) / avg_home_goals
beta_h = (dict[key]['home_conceded'] / dict[key]['home_games']) / avg_away_goals
alpha_a = (dict[key]['away_goals'] / dict[key]['away_games']) / avg_away_goals
beta_a = (dict[key]['away_conceded'] / dict[key]['away_games']) / avg_home_goals
dict[key]['alpha_h'] = alpha_h
dict[key]['beta_h'] = beta_h
dict[key]['alpha_a'] = alpha_a
dict[key]['beta_a'] = beta_a
Use a deque to keep the 6 most recent items in memory; adding a new record will "push out" the oldest one.
import collections
import itertools
import csv
with open("foo.csv") as fh:
# Skip the first 44 rows
csv_read = islice(csv.reader(fh), 44, None)
# Initialize the deque with the next 6 rows
d = collections.deque(islice(csv_read, 6), 6)
for record in csv_read:
d.append(record)
print(list(d)) # Rows 46-51, then 47-52, then 48-53, etc
Because you set the maximum length of the deque to 6, each append to a "full" deque pushes out the older one. On the first iteration, d.append pushes out row 45 and adds row 51. On the next iteration, adding row 52 pushes out row 46, etc.
In general, a deque is a data structure that is like a combination of a queue and a stack; you can add or remove items to either end efficiently, but accessing an arbitrary item or modifying the "middle" is slow. Here, we're taking advantage of the fact that appending to a full deque causes an implicit removal from the opposite end.
How about:
if seen_records == 200:
recs = list(csvRead)[seen_records - 6:seen_records + 1]
You can do something like this....
previous_index = 0
previous_max = 6 # max number of previous numbers to remember
previous = [None for _ in range(previous_max)]
csvFile = 'X.csv'
seen_records = 0
csvRead = csv.reader(open(csvFile))
# Enumerate over the records to keep track of the index of each one
for i, records in enumerate(csvRead):
if (i > 50):
seen_records =+ 1
if previous_index == previous_max:
previous_index = 0 # Reset to the beginning when we reach the end
# Store the record and increment the index to the next location
previous[previous_index] = record
previous_index += 1
This creates a very basic array of length previous_max and just stores the oldest data at index 0 and newest at previous_max -1.
We have a code to draw circles on the Location on the map with the name of each category. Now the circles and text are one color. How do we get them in different color's by category? Example: Category Garden: Blue, Category Stone: Grey.
So far the code:
size(1500,800)
background(1)
nofill()
stroke('#f91')
pen(.2)
fill('#f91', 0.05)
rotate(90)
font("Avenir", "bold", 10)
align('left')
def mapValue(value, fromMin, fromMax, toMin, toMax):
# Figure out how 'wide' each range is
fromSpan = fromMax - fromMin
toSpan = toMax - toMin
# Convert the from range into a 0-1 range (float)
valueScaled = float(value - fromMin) / float(fromSpan)
# Convert the 0-1 range into a value in the to range.
return toMin + (valueScaled * toSpan)
def xOfDot(lon):
return mapValue(lon, -100, 100, 0, WIDTH)
def yOfDot(lat):
return mapValue(lat, -90, 90, HEIGHT, 0)
with open('theft-alerts.json', 'r') as inputFile:
data = json.load(inputFile)
print len(data)
artworksPerCity = {}
for stolenArt in data:
if stolenArt.has_key('Category'):
city = stolenArt['Category']
if stolenArt.has_key('nItemsStolen'):
numbersStolen = int(stolenArt['nItemsStolen'])
if artworksPerCity.has_key(city):
# Adjust the value stored for this city
artworksPerCity[city] = artworksPerCity[city] + numbersStolen
else:
# Create new key with new value
artworksPerCity[city] = numbersStolen
# Draw circle on the map
radius = artworksPerCity[city] /2
x = xOfDot(stolenArt['Lon'])
y = yOfDot(stolenArt['Lat'])
arc(x, y, radius)
text(city, x, y)
print artworksPerCity
Here is a sketch of what I intend to include in my pure python data utility.
def hexidecimalDiget(n,deHex = false):
if(n<0):
print "negitive values not supported by call to hexidecimalDiget("+str(n)+")"
return None
elif(n < 10):
return str(n)
elif(n < 15):
return ["a","b","c","d","e"][n-10]
elif(n in ["a","b","c","d","e"]):
if deHex:
return ["a","b","c","d","e"].index(n)
return n
else:
print "call to hexidecimalDiget("+str(n)+") not supported!"
return None
def colorFormHexArray(arr):
if len(arr)!=3 and len(arr)!=6:
print "invalid length for color on call to colorFormHexArray("+str(arr)+")"
return None
elif None in arr:
print "cannot make color from None arguments in "+str(arr)
return None
else:
ret = "#"
for k in arr:
if(type(k) == list):
for k2 in k:
ret+=hexidecimalDiget(k)
else:
ret+=hexidecimalDiget(k)
return ret
def arrayFromColor(c):
c = c.replace("#","")
col = []
for n,k in enumerate(c):
if(len(c) == 3):
col.append([hexidecimalDiget(k,deHex = True)])
elif(len(c) == 6):
col.append([hexidecimalDiget(c[(n+1)*2-2],deHex = True),hexidecimalDiget(c[(n+1)*2-2],deHex = True)])
return(col)
def intFromHexPair(hp):
ret = 0
for n,k in enumerate(hp):
digBase = 16**(len(hp)-n-1)
ret+=digBase*hexidecimalDiget(hp[0],deHex = True)
return ret
def hexPairFromInt(I,minDigits = 1,maxDigits = 256):
if I<0:
print "negitive numbers not supported by hexPairFromInt"
k= 0
while(16**(k+1) <= I):
k+=1
if k < minDigits:
k = minDigits
if k > minDigits:
print("maxDigitsExceeded")
ret = []
while k>=0
dig = 16**k
ret.append(hexidecimalDiget(int(I)%(dig))
I -= dig
k-=1
return ret
def specColor(start,end,bottom,top):
start = arrayFromColor(start)
end = arrayFromColor(end)
def ret(v):
if( v<start or c>end ):
print("value out of range "+str([start,end]))
return('#aa0000') #eyo <- error red
else:
starts = [intFromHexPair(k) for k in start]
ends = [intFromHexPair(hp) for k in end]
normalized = (v-bottom)/(top-bottom)
return colorFormHexArray([hexPairFromInt(int((starts[n]-ends[n])*normalized),minDigits = 1,maxDigits = 256) for n,k in enumerate(starts)])
return ret
This seems excessive and hasn't even been slightly tested yet (just a stetch up atm) but I'll be testing and incorporating this code here tonight :: http://krewn.github.io/KPlot/