This is my home page and it takes 30 seconds to run. There are lot of graphs, wordcloud based on the dataset that have approx 1000 articles and some basic operation on sqlalchemy. But still it should not take that much time. How can I reduce the time?
#app.route('/home',methods=["get","post"])
def showjson():
folder = 'C:/Users/Mansi Dhingra/Desktop/Projects/api/news/static/images'
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
os.remove(file_path)
news_df = pd.read_csv('news_information1.csv')
news_df.to_sql('users', con=engine)
topic_l = engine.execute('''Select distinct Topic from users''').fetchall()
topic_list=[]
for tr in topic_l:
topic_list.append(tr[0])
search = request.form.get("search")
source_l=engine.execute('''Select distinct source from users''').fetchall()
source_list = []
for tr in source_l:
source_list.append(tr[0])
bank_l = engine.execute('''Select distinct bank from users''').fetchall()
bank_list = []
for tr in bank_l:
bank_list.append(tr[0])
end_date = engine.execute('''Select max(date) from users''').fetchall()
max_date=end_date[0][0]
sent_count = engine.execute('''Select Sentiment,Count(*) from users group by Sentiment''').fetchall()
sent_topic = []
sent_count1 = []
for tx in sent_count:
sent_topic.append(tx[0])
sent_count1.append(tx[1])
fig_sent=create_graphs(sent_topic,sent_count1,"sentiment")
list_words = fetch_sentiment_using_vader(news_df['clean_text'])
stopwords = stopwords_for_wordcount(news_df['clean_text'])
count_vectorizer = CountVectorizer(stop_words=stopwords[0])
fig_pos=plot_words(list_words[0], list_words[2], "positive")
fig_neg=plot_words(list_words[1], list_words[2], "negative")
fig_cat=count_category(news_df)
fig_pub=count_pub(news_df)
create_wordcloud( stopwords)
fig_tri=bigram_or_trigram(news_df['clean_text'], stopwords,"bigram")
images_list = os.listdir(os.path.join(app.static_folder, "images"))
return render_template('news_home.html',fig_pub=fig_pub,topic_list=topic_list,img=images_list,plt_pos=fig_pos,plt_tri=fig_tri,plt_neg=fig_neg,
bank_list=bank_list,source_list=source_list,max_date=max_date,fig_cat=fig_cat,fig_sent=fig_sent,search=search)
If you're using older than 1.0 version of Flask, then enable multi-threading. e.g. app.run(threaded=True). Otherwise, if you have version newer than 1.0 then you can use Flask's default WSGI toolkit Werkzeug profiler to find expensive functions for each request.
from werkzeug.middleware.profiler import ProfilerMiddleware
app = ProfilerMiddleware(app, restrictions=[20])
Doing above will show the 20 most expensive functions for each request. Inspect them and modify/refactor yourself.
Just a tip if you're considering taking it to production, default Flask isn't production-ready. There are a few additional steps to take a flask app to production. Some guidance here in the official Flask documentation: Deploy to Production. Gunicorn with NGINX is a very good option.
Related
This question already has answers here:
Are a WSGI server and HTTP server required to serve a Flask app?
(3 answers)
Closed 3 years ago.
My Flask API which deploys keras model is too slow. It only serves 180 requests in 10 seconds.
The API has 3 .py files.
server_side.py:
def predict():
if request.method == "POST":
comment = request.form["comment"]
movies = [int(x) for x in comment.split(",")]
movies, g_input, t_input, movie_copy = prepare_inputs(
movies)
#print(movies[:10],g_input,t_input)
preds = make_prediction(movies, g_input, t_input, MODEL)
most_similar = preds[0].argsort()[-(10 + len(movie_copy)) :][::-1]
watched_movies, rec_movies = result(movie_copy, most_similar, DF)
context = {
"table1": [watched_movies.to_html(classes="data")],
"title1": watched_movies.columns.values,
"table2": [rec_movies.to_html(classes="data")],
"title2": rec_movies.columns.values,
}
return render_template("prediction.html", context=context)
if __name__ == "__main__":
global MODEL
MODEL = MODEL
app.run(threaded=False)
prediction_helper.py:
has two methods, one to predict and another to prepare inputs to the required length and shape.
def make_prediction(movies, g_input, t_input, model):
return model.predict(
[np.array([movies,]), np.array([g_input,]), np.array([t_input,])]
)
def prepare_inputs(movie_input):
movies_copy = movie_input[:]
genres = ' '.join(DF.genres[DF["movieId"].isin(movie_input)].values)
genres = genres.split(" ")
tags = ' '.join(DF.tag[DF["movieId"].isin(movie_input)].values)
tags = tags.split(" ")
genres = [
list(GENRES_MAP.keys())[list(GENRES_MAP.values()).index(i)] for i in genres
]
genres = list(set(genres))
genres = pad(genres, INPUT_LENGTH["genre_len"])
tags = [
list(TAG_MAP.keys())[list(TAG_MAP.values()).index(i)]
for i in tags
if i not in ("em", "cs", "se")
]
tags = list(set(tags))
if len(tags) > 100:
tags = tags[0:100]
else:
tags = pad(tags, INPUT_LENGTH["tag_len"])
movie_input = pad(movie_input, INPUT_LENGTH["movie_len"])
return movie_input, genres, tags, movies_copy
data_loader.py:
has two methods, pad which pads a list to the required length and another which extracts rows based on the prediction indices.
import pickle
import pandas as pd
from keras.models import load_model
MODEL = load_model("final_train1.h5")
DF = pd.read_csv("new_df.csv")
DF["tag"] = DF["tag"].fillna("")
PICKLE_IN = open("genres_map.pickle", "rb")
GENRES_MAP = pickle.load(PICKLE_IN)
PICKLE_IN = open("tag_map.pickle", "rb")
TAG_MAP = pickle.load(PICKLE_IN)
INPUT_LENGTH = {"movie_len":2698, "genre_len":24, "tag_len":100}
def pad(lst, width):
lst.extend([0] * (width - len(lst)))
return lst
def result(movie_copy, most_similar, DF):
rec_movies = DF.set_index("movieId").loc[most_similar]
watched_movies = DF.set_index("movieId").loc[movie_copy]
return watched_movies, rec_movies
You shouldn't use flask built-in server that just designed for development purpose and not for production deployment. Instead, you should use an external server for production deployment like Waitress, uWSGI, Gunicorn or Nginx. You can read here for the options for flask production deployment: https://flask.palletsprojects.com/en/1.1.x/deploying/
Flask documentation says the development server is not for production. Use a production WSGI server such as Gunicorn along with Nginx.
I am teaching myself how to use python and django to access the google places api to make nearby searches for different types of gyms.
I was only taught how to use python and django with databases you build locally.
I wrote out a full Get request for they four different searches I am doing. I looked up examples but none seem to work for me.
allgyms = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
all_text = allgyms.text
alljson = json.loads(all_text)
healthclubs = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=healthclub&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
health_text = healthclubs.text
healthjson = json.loads(health_text)
crossfit = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=crossfit&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
cross_text = crossfit.text
crossjson = json.loads(cross_text)
I really would like to be pointed in the right direction on how to have the api key referenced only one time while changing the keywords.
Try this for better readability and better reusability
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
LOCATION = '38.9208,-77.036'
RADIUS = '2500'
TYPE = 'gym'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
KEYWORDS = ''
allgyms = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&key='+API_KEY) all_text = allgyms.text
alljson = json.loads(all_text)
KEYWORDS = 'healthclub'
healthclubs = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
health_text = healthclubs.text
healthjson = json.loads(health_text)
KEYWORDS = 'crossfit'
crossfit = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
cross_text = crossfit.text
crossjson = json.loads(cross_text)
as V-R suggested in a comment you can go further and define function which makes things more reusable allowing you to use the that function in other places of your application
Function implementation
def makeRequest(location, radius, type, keywords):
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
result = requests.get(BASE_URL+'location='+location+'&radius='+radius+'&type='+type+'&keyword='+keywords+'&key='+API_KEY)
jsonResult = json.loads(result)
return jsonResult
Function invocation
json = makeRequest('38.9208,-77.036', '2500', 'gym', '')
Let me know if there is an issue
So I am trying my best to navigate my way through the Facebook API. I need to crate a script that will download my business' campaign information daily as a csv file so I can use another script to upload the information to our database easily.
I finally have code that works to print the information to the log, but I am reaching the user request limit because I have to call get_insights() for every single campaign individually. I am wondering if anyone knows how to help me make it so I don't have to call the facebook API as often.
What I would like to do if find a field where I can get the daily spend so I don't have to call the API in every iteration of my for campaign loop, but I cannot for the life of me find a way to do so.
#Import all the facebook mumbo jumbo
from facebookads.api import FacebookAdsApi
from facebookads.adobjects.adset import AdSet
from facebookads.adobjects.campaign import Campaign
from facebookads.adobjects.adsinsights import AdsInsights
from facebookads.adobjects.adreportrun import AdReportRun
from facebookads.adobjects.adaccount import AdAccount
from facebookads.adobjects.business import Business
import time
#Set the login info
my_app_id = '****'
my_app_secret = '****'
my_access_token = '****'
#Start the connection to the facebook API
FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token)
business = Business('****')
#Get all ad accounts on the business account
accounts = business.get_owned_ad_accounts(fields=[AdAccount.Field.id])
#iterate through all accounts in the business account
for account in accounts:
tempaccount = AdAccount(account[AdAccount.Field.id])
#get all campaigns in the adaccount
campaigns = tempaccount.get_campaigns(fields=[Campaign.Field.name,Campaign.Field])
#iterate trough all the campaigns in the adaccount
for campaign in campaigns:
print(campaign[Campaign.Field.name])
#get the insight info (spend) from each campaign
campaignsights = campaign.get_insights(params={'date_preset':'yesterday'},fields=[AdsInsights.Field.spend])
print (campaignsights)
It took a while of digging through the API and guessing but I got it! Here is my final script:
# This program downloads all relevent Facebook traffic info as a csv file
# This program requires info from the Facebook Ads API: https://github.com/facebook/facebook-python-ads-sdk
# Import all the facebook mumbo jumbo
from facebookads.api import FacebookAdsApi
from facebookads.adobjects.adsinsights import AdsInsights
from facebookads.adobjects.adaccount import AdAccount
from facebookads.adobjects.business import Business
# Import th csv writer and the date/time function
import datetime
import csv
# Set the info to get connected to the API. Do NOT share this info
my_app_id = '****'
my_app_secret = '****'
my_access_token = '****'
# Start the connection to the facebook API
FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token)
# Create a business object for the business account
business = Business('****')
# Get yesterday's date for the filename, and the csv data
yesterdaybad = datetime.datetime.now() - datetime.timedelta(days=1)
yesterdayslash = yesterdaybad.strftime('%m/%d/%Y')
yesterdayhyphen = yesterdaybad.strftime('%m-%d-%Y')
# Define the destination filename
filename = yesterdayhyphen + '_fb.csv'
filelocation = "/cron/downloads/"+ filename
# Get all ad accounts on the business account
accounts = business.get_owned_ad_accounts(fields=[AdAccount.Field.id])
# Open or create new file
try:
csvfile = open(filelocation , 'w+', 0777)
except:
print ("Cannot open file.")
# To keep track of rows added to file
rows = 0
try:
# Create file writer
filewriter = csv.writer(csvfile, delimiter=',')
except Exception as err:
print(err)
# Iterate through the adaccounts
for account in accounts:
# Create an addaccount object from the adaccount id to make it possible to get insights
tempaccount = AdAccount(account[AdAccount.Field.id])
# Grab insight info for all ads in the adaccount
ads = tempaccount.get_insights(params={'date_preset':'yesterday',
'level':'ad'
},
fields=[AdsInsights.Field.account_id,
AdsInsights.Field.account_name,
AdsInsights.Field.ad_id,
AdsInsights.Field.ad_name,
AdsInsights.Field.adset_id,
AdsInsights.Field.adset_name,
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
AdsInsights.Field.cost_per_outbound_click,
AdsInsights.Field.outbound_clicks,
AdsInsights.Field.spend
]
);
# Iterate through all accounts in the business account
for ad in ads:
# Set default values in case the insight info is empty
date = yesterdayslash
accountid = ad[AdsInsights.Field.account_id]
accountname = ""
adid = ""
adname = ""
adsetid = ""
adsetname = ""
campaignid = ""
campaignname = ""
costperoutboundclick = ""
outboundclicks = ""
spend = ""
# Set values from insight data
if ('account_id' in ad) :
accountid = ad[AdsInsights.Field.account_id]
if ('account_name' in ad) :
accountname = ad[AdsInsights.Field.account_name]
if ('ad_id' in ad) :
adid = ad[AdsInsights.Field.ad_id]
if ('ad_name' in ad) :
adname = ad[AdsInsights.Field.ad_name]
if ('adset_id' in ad) :
adsetid = ad[AdsInsights.Field.adset_id]
if ('adset_name' in ad) :
adsetname = ad[AdsInsights.Field.adset_name]
if ('campaign_id' in ad) :
campaignid = ad[AdsInsights.Field.campaign_id]
if ('campaign_name' in ad) :
campaignname = ad[AdsInsights.Field.campaign_name]
if ('cost_per_outbound_click' in ad) : # This is stored strangely, takes a few steps to break through the layers
costperoutboundclicklist = ad[AdsInsights.Field.cost_per_outbound_click]
costperoutboundclickdict = costperoutboundclicklist[0]
costperoutboundclick = costperoutboundclickdict.get('value')
if ('outbound_clicks' in ad) : # This is stored strangely, takes a few steps to break through the layers
outboundclickslist = ad[AdsInsights.Field.outbound_clicks]
outboundclicksdict = outboundclickslist[0]
outboundclicks = outboundclicksdict.get('value')
if ('spend' in ad) :
spend = ad[AdsInsights.Field.spend]
# Write all ad info to the file, and increment the number of rows that will display
filewriter.writerow([date, accountid, accountname, adid, adname, adsetid, adsetname, campaignid, campaignname, costperoutboundclick, outboundclicks, spend])
rows += 1
csvfile.close()
# Print report
print (str(rows) + " rows added to the file " + filename)
I then have a php script that takes the csv file and uploads it to my database. The key is pulling all the insight data in one big yank. You can then break it up however you want because each ad has information about its adset, adaccount, and campaign.
Adding a couple of small functions to improve on LucyTurtle's answer as it is still susceptible to Facebook's Rate Limiting
import logging
import requests as rq
#Function to find the string between two strings or characters
def find_between( s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
#Function to check how close you are to the FB Rate Limit
def check_limit():
def check_limit():
check=rq.get('https://graph.facebook.com/v3.3/act_'+account_number+'/insights?access_token='+my_access_token)
call=float(find_between(check.headers['x-business-use-case-usage'],'call_count":','}'))
cpu=float(find_between(check.headers['x-business-use-case-usage'],'total_cputime":','}'))
total=float(find_between(check.headers['x-business-use-case-usage'],'total_time":',','))
usage=max(call,cpu,total)
return usage
#Check if you reached 75% of the limit, if yes then back-off for 5 minutes (put this chunk in your 'for ad is ads' loop, every 100-200 iterations)
if (check_limit()>75):
print('75% Rate Limit Reached. Cooling Time 5 Minutes.')
logging.debug('75% Rate Limit Reached. Cooling Time 5 Minutes.')
time.sleep(300)
I'd just like to say
Thank you.
As Marks Andre said - you made my day!
The FB SDK documentation is exhaustive, but it completely lacks the practical implementation examples for day-to-day-tasks like this one. Bookmark is set - page will be revisited soon.
So the only thing I can actually contribute for fellow sufferers: it seems that with the newer facebook_business SDK you can simply completely replace "facebookads" in the import statements with "facebook_business".
I am currently having problems with Timeouts and performance on Django redirection. The issue was not visible until I was surfing to my locally hosted application with 2 devices and only one worker enabled on my localhost, timeout set to 30 seconds.
I have a views.py function that redirects a page, based on that is given the URL. I do a lookup for the pk in a table and return the url. I also have a counter that keeps track of the amount of forwards.
urls.py here:
url(r'^i/(?P<pk>[-\w]+)/$', frontendapp_views.item_view, name="item_view"),
The page redirects instantly to the "desired_url_forward", however, the connection stays open with the user, while in fact, the user has left my Django environment. This somehow leaves my worker waiting for 30 seconds while I was already forwarded to an external page, not allowing to process any other request with one worker.
I could increase the number of workers or shorten the timeout time, but that doesn't feel right as it is not fixing the core issue.
This is the only thing I found out on this topic but I am not skilled enough to understand this: https://github.com/requests/requests/issues/520
This is how the views.py looks like:
def item_view(request,pk):
pk_binairy = urlsafe_base64_decode(pk)
pk_int = int.from_bytes(pk_binairy, byteorder='little')
desired_url_forward_object = get_object_or_404(forwards,pk = pk_int)
channel_cleaned_utm = re.sub(' +',' ',"".join([request.GET.get('utm_source', ''),' ',request.GET.get('utm_medium', ''),' ',request.GET.get('utm_campaign', ''),' ',request.GET.get('utm_term', ''),' ',request.GET.get('utm_content', '')]))
channel_cleaned = request.META.get('HTTP_REFERER')
if channel_cleaned is None:
channel_cleaned = 'Direct Traffic'
visitor_ip_request = get_client_ip(request)
location_request = get_client_location(request, visitor_ip_request)
clickstat = clickstats(
urlid = pk_int,
user = desired_url_forward_object.user,
channel = channel_cleaned,
visitor_ip = visitor_ip_request,
city = location_request['city'],
region = location_request['region'],
country = location_request['country'],
device_type = request.user_agent.device.family,
browser = request.user_agent.browser.family,
browser_version = request.user_agent.browser.version_string,
operating_system = request.user_agent.os.family ,
operating_system_version = request.user_agent.os.version_string
)
clickstat.save()
if desired_url_forward_object.counterA <= desired_url_forward_object.counterB:
desired_url_forward = desired_url_forward_object.urlA
desired_url_forward_object.counterA = F('counterA') + 1
else:
desired_url_forward = desired_url_forward_object.urlB
desired_url_forward_object.counterB = F('counterB') + 1
desired_url_forward_object.save()
return redirect(desired_url_forward)
Anyone suggestions? Thanks for the help!
I want to use lucene using python.
I used StandardAnalyser for indexing and searching both. It works fine but now, my requirement changes, I need to use KeywordAnalyser.
Code for StandardAnalyser:
#Importing packages
import lucene
lucene.initVM()
from org.apache.lucene import analysis, document, index, queryparser, search, store, util
#Initialize Parameters
analyzer = analysis.standard.StandardAnalyzer(util.Version.LUCENE_CURRENT)
config = index.IndexWriterConfig(util.Version.LUCENE_CURRENT, self.analyzer)
directory = store.FSDirectory.open(File(<path_where_to_index>))
iwriter = None
iwriter = index.IndexWriter(directory, config)
#Indexing Part
doc = document.Document()
doc.add(document.Field("fieldname", entity, document.Field.Store.YES, document.Field.Index.ANALYZED))
doc.add(document.Field("category", category, document.Field.Store.YES, document.Field.Index.ANALYZED))
iwriter.addDocument(doc)
iwriter.commit()
#Searching Part
ireader = index.IndexReader.open(directory)
isearcher = search.IndexSearcher(ireader)
parser = queryparser.classic.QueryParser(util.Version.LUCENE_CURRENT, "fieldname", self.analyzer)
query = parser.parse(entity)
hits = isearcher.search(query, None, 100).scoreDocs
print hits
for hit in hits:
hitDoc = isearcher.doc(hit.doc)
print hitDoc
Above code is using StandardAnalyser. I want to use KeywordAnalyser instead of StandardAnalyser.
I have changed analyser in below code. Below Code is using KeywordAnalyser but searching is not performing.
Code for KeywordAnalyser:
#Importing packages
import lucene
lucene.initVM()
from org.apache.lucene import analysis, document, index, queryparser, search, store, util
#Initialize Parameters
analyzer = analysis.core.KeywordAnalyser(util.Version.LUCENE_CURRENT)
config = index.IndexWriterConfig(util.Version.LUCENE_CURRENT, self.analyzer)
directory = store.FSDirectory.open(File(<path_where_to_index>))
iwriter = None
iwriter = index.IndexWriter(directory, config)
#Indexing Part
doc = document.Document()
doc.add(document.Field("fieldname", entity, document.Field.Store.YES, document.Field.Index.ANALYZED))
doc.add(document.Field("category", category, document.Field.Store.YES, document.Field.Index.ANALYZED))
iwriter.addDocument(doc)
iwriter.commit()
#Searching Part
ireader = index.IndexReader.open(directory)
isearcher = search.IndexSearcher(ireader)
parser = queryparser.classic.QueryParser(util.Version.LUCENE_CURRENT, "fieldname", self.analyzer)
query = parser.parse(entity)
hits = isearcher.search(query, None, 100).scoreDocs
print hits
for hit in hits:
hitDoc = isearcher.doc(hit.doc)
print hitDoc
Any help?
I found the solution of my question.
To use KeywordAnalyser, I need analysis.core. I can't use queryparser for searching because it mostly works for StandardAnalyser. To do search on KeywordAnalyser, I need to use index.Term and search.TermQuery
Searching Code:
term_parser = index.Term("fieldname", entity)
query = search.TermQuery(term_parser)
hits = isearcher.search(query, None, 10).scoreDocs