Iteration and memory problems in Django

Iteration and memory problems in Django - python

I need to create pairs of hashtags so people can judge whether the two tags in question refer to the same thing. The problem is that there are A LOT of hashtags, and I'm running the code on a Dreamhost VPS, so my memory is somewhat limited.
Here's my relevant models:
class Hashtag(models.Model):
text = models.CharField(max_length=140)
competitors = models.ManyToManyField('Hashtag', through='Competitors')
tweet = models.ManyToManyField('Tweet')
def __unicode__(self):
return unicode_escape(self.text)
class Competitors(models.Model):
tag1 = models.ForeignKey('Hashtag', related_name='+')
tag2 = models.ForeignKey('Hashtag', related_name='+')
yes = models.PositiveIntegerField(default=0, null=False)
no = models.PositiveIntegerField(default=0, null=False)
objects = models.Manager()
def __unicode__(self):
return u'{0} vs {1}'.format(unicode_escape(self.tag1.text), unicode_escape(self.tag2.text))
Here's the code I've developed to create the Competitors objects and save them to my DB:
class Twitterator(object):
def __init__(self, infile=None, outfile=None, verbosity=True):
...
self.competitors_i = 1
...
def __save_comps__(self,tag1, tag2):
try:
comps = Competitors(id=self.competitors_i,
tag1=tag1,
tag2=tag2,
yes=0,
no=0)
comps.save()
except IntegrityError:
self.competitors_i += 1
self.save_comps(tag1, tag2)
else:
self.competitors_i += 1
def competitors_to_db(self, start=1):
tags = Hashtag.objects.all()
i = start
while True:
try:
tag1 = tags.get(pk=i)
j = i + 1
while True:
try:
tag2 = tags.get(pk=j)
self.__save_comps__(tag1, tag2)
j += 1
except Hashtag.DoesNotExist:
break
i += 1
except Hashtag.DoesNotExist:
break
It all "works", but never manages to get that far before I run out of memory and the whole thing gets killed. I thought using .get would be less memory-intensive, but it doesn't seem to be less memory-intensive enough. I'm under the impression that Django Querysets are iterators already, so my usual 'make an iterator' trick is out. Any suggestions for further reducing my memory footprint?

I think the problem is in this function, i is not getting incremented properly and you will keep looping for same value of i.
def competitors_to_db(self, start=1):
tags = Hashtag.objects.all()
i = start
while True:
try:
tag1 = tags.get(pk=i)
j = i + 1
while True:
try:
tag2 = tags.get(pk=j)
self.__save_comps__(tag1, tag2)
j += 1
except Hashtag.DoesNotExist:
break #<------move this after i +=1 otherwise i will not increment
i += 1

Related

How do I print all of the instances from a set of variables that are undefined from the beginning?

I have a program that I want to be able to print all of the instances of each variable using my method that I created. Problem is I can't figure out a way to print them since each are listed under a different variable that aren't configured from hardcoding them in and I need a way to automatically recall them in my code.
class fit:
def __init__(self,day,did,workout='Not Recorded',time='An unknown amount of',calories='An unknown amount of'):
self.day = day
self.did = did
if did.lower()=='no':
self.workout = 'Not Recorded'
self.time = "An unknown amount of Minutes"
self.calories = "An unknown amount of Calories"
else:
self.workout = workout
self.time = "{} Minutes".format(time)
self.calories = "{} Calories".format(calories)
def formate(self):
self.formate = "{}:\n\nDid you work out: {}\nWorkout focus: {}\nYou worked out for: {}\nYou burned: {}\n\n----------------------------------------------------------".format(self.day,self.did,self.workout,self.time,self.calories)
return self.formate
def reader(day,index):
file = open('readme.txt')
file = file.read()
stripped = file.rsplit("\n")
for i in range(len(stripped)):
stripped[i] = stripped[i].rsplit(" ")
del stripped[-1]
if int(index) >= len(stripped[day-1]):
return "none"
else:
return stripped[day-1][index]
x = 0
def create_new_instance(class_name,instance_name):
globals()[instance_name] = class_name(reader(x,0),reader(x,1),reader(x,2),reader(x,3),reader(x,4))
print('Class instance {} created'.format(instance_name))
while True:
try:
x+=1
ins = 'day_' + str(x)
create_new_instance(fit,ins)
except:
break
break
def printer(instance):
print(.formate())
while True:
x+=1
inst = 'day_' + str(x)
printer(inst)
An example of this might be that I have 8 lines of data from a text document and I have a system that creates instances of day_1, day_2, day_3 ect until day_8 and then I want to print each of those instances out, but again I don't have those instances directly hardcoded into my code so I don't know how I'd do it. I've tried looking into maybe a while loop and increasing a variable by 1 and concatenating it with day and trying to make a variable out of that but the my limited experience with python isn't helping.

A very unpythonic and ugly way would be to use exec, for example:
day_3=5
x = 'day_'+'3'
exec("print("+x+")")
I would recommend another way to store your variables though.

Multhreading with updates to MySQL

I need to evaluate around 80k rows of data every day at 11am, and I hope to accomplish it within a few minutes.
I used multithreading that uses select_for_update() of Django that gets one row at a time, updates it, and then gets a new row.
The problem is, there is an issue where the counter increases too fast having the assumption that there are times where the row gets evaluated twice.
Here is my current code block:
while True:
with transaction.atomic():
user_history = UserHistory.objects.select_for_update().filter(is_finished=False).first()
if user_history:
user = UserProfile.objects.filter(id=user_history.user_profile_id).first()
card_today = CardToday.objects.filter(id=user_history.card_today_id).first()
rewarded_value = 0
if user_history is item1:
if card_today.item1 > card_today.item2:
rewarded_value = card_today.item2/card_today.item1 + 1
elif user_history is item2:
if card_today.item2 > card_today.item1:
rewarded_value = card_today.item1/card_today.item2 + 1
user.coins += float(user.coins) + rewarded_value # the value increases too high here
user.save()
user_history.save()
else
break
This is the model for Card Today:
class CardToday(models.Model):
item1 = models.IntegerField(default=0)
item2 = models.IntegerField(default=0)
This is the model for User History:
class UserHistory(models.Model):
card_today = models.ForeignKey(CardToday, on_delete=models.CASCADE)
answer_item = models.ForeignKey(Item, on_delete=models.CASCADE)
is_finished = models.BooleanField(default=False) // checks whether the card has already been evaluated.
rewarded value's computation is as follows:
rewarded_value = majority/minority + 1
majority and minority switches depending on which item has a greater value.
Each user_history can only choose between item1 or item2.
After a certain amount of time has passed, the code will evaluate which item has been picked on a CardToday.
Is there a better way of accomplishing this?
The framework I'm using is Django, and I have a cron job running from the library django-cron.

Django DB object filter first not getting new items

For some reason when I run this code it keeps looping over the same object and is not getting any new items from the database. In other words, the print output is just the same object over and over, when it should be iterating over items in the list. Here is my code:
article = Article.objects.filter(is_locked=False, is_downloaded=False).first()
while article:
article.is_locked = True
article.save()
print '******************************'
date = article.datetime
title = article.title
url = article.url
print('date: %s' % date)
print('url: %s' % url)
print('title: %s' % title)
get_article(url, title, article)
article = Article.objects.filter(is_locked=False, is_downloaded=False).first()
Where mldb.models is:
from django.db import models
class Article(models.Model):
url = models.CharField(max_length=1028)
title = models.CharField(max_length=1028)
category = models.CharField(max_length=128)
locale = models.CharField(max_length=128)
section = models.CharField(max_length=512)
tag = models.CharField(max_length=128)
author = models.CharField(max_length=256)
datetime = models.DateTimeField()
description = models.TextField()
article = models.TextField()
is_locked = models.BooleanField(default=False)
is_downloaded = models.BooleanField(default=False)
def __str__(self): # __unicode__ on Python 2
return self.name
class Meta:
app_label = 'mldb'
I have also tried this but it also does not loop through objects either (the loop just repeats the same object over and over):
articles = Article.objects.filter(is_locked=False, is_downloaded=False)
for article in articles:
...
Here is get_article(). This seems to be what is causing the problem (if I remove the call to this function everything works properly):
def get_article(url, title, article):
failed_attempts = 0
while True:
try:
content = urllib2.urlopen(url).read()
soup = BeautifulSoup(content, "html5lib")
description = soup.find(property="og:description")["content"] if soup.find(property="og:description") else ''
locale = soup.find(property="og:locale")["content"] if soup.find(property="og:locale") else ''
section = soup.find(property="og:article:section")["content"] if soup.find(property="og:article:section") else ''
tag = soup.find(property="og:article:tag")["content"] if soup.find(property="og:article:tag") else ''
author = soup.find(property="og:article:author")["content"] if soup.find(property="og:article:author") else ''
date = soup.find(property="og:article:published_time")["content"] if soup.find(property="og:article:published_time") else ''
print 'date'
print date
body = ''
for body_tag in soup.findAll("div", {"class" : re.compile('ArticleBody_body.*')}):
body += body_tag.text
# datetime.strptime (ts, "%Y") # 2012-01-02T04:32:57+0000
dt = dateutil.parser.parse(date, fuzzy=True)
print dt
print url
article.title = title.encode('utf-8')
article.url = url.encode('utf-8')
article.description = description.encode('utf-8')
article.locale = locale.encode('utf-8')
article.section = section.encode('utf-8')
article.tag = tag.encode('utf-8')
article.author = author.encode('utf-8')
article.body = body.encode('utf-8')
article.is_downloaded = True
article.article = body
article.save()
print(description.encode('utf-8'))
except (urllib2.HTTPError, ValueError) as err:
print err
time.sleep(20)
failed_attempts += 1
if failed_attempts < 10:
continue
Any ideas?

The way I see it you have an infinite loop in your get_article() function.
Consider this simplified version of your get_article() for illustration purposes:
def get_article(url, title, article):
failed_attempts = 0
# Note how this while loop runs endlessly.
while True:
try:
# doing something here without calling `return` anywhere
# I'll just write `pass` for the purpose of simplification
pass
except (urllib2.HTTPError, ValueError) as err:
failed_attempts += 1
if failed_attempts < 10:
# you're calling `continue` here but you're not calling
# `break` or `return` anywhere if failed_attemps >= 10
# and therefore you're still stuck in the while-loop
continue
Note that simply not calling continue will not stop a while loop:
while True:
print('infinite loop!')
if some_condition:
# if some_condition is truthy, continue
continue
# but if it's not, we will continue anyway. the above if-condition
# therefore doesn't make sense
A fixed version may look like this, I omitted the details:
def get_article(url, title, article):
failed_attempts = 0
while True:
try:
# it's considered good practice to only put the throwing
# statement you want to catch in the try-block
content = urllib2.urlopen(url).read()
except (urllib2.HTTPError, ValueError) as err:
failed_attempts += 1
if failed_attempts == 10:
# if it's the 10th attempt, break the while loop.
# consider throwing an error here which you can handle
# where you're calling `get_article` from. otherwise
# the caller doesn't know something went wrong
break
else:
# do your work here
soup = BeautifulSoup(content, "html5lib")
# ...
article.save()
# and call return!
return

Random match-up with Python and Google App Engine

I am building a website where two music videos are randomly chosen from a database and go head-to-head for voting. I need an algorithm that will continue picking unique match-ups for a user excluding match-ups they have had in the past, but with replacing videos for new match-ups. You can view a sample of the page here: http://10.showtownmvp.appspot.com/
I am running this on Google App Engine - Python, and have a voting table and videos table that stores the results. I would like to keep it as random as possible and avoid multiple queries, so if you have suggestions on how to model this in NDB or have a good algorithm, I would appreciate your help!

My solution to this problem was to query all videos from the datastore and randomly select one. I also ran a query for past votes / matchups for the user and converted this to a list so I could manipulate it without running several queries. Using the random video, I used a while loop to find a second video that was not in the previous matchup list. If no video was found, the program would remove the random choice from the video list, then select a new sample and run the search again. The code is below:
class MainHandler(views.Template):
def post(self):
# NOTE: we are posting genre and state.
user = self.user_check()
self.videos = models.videos.Videos.fetch_featured()
try:
random.sample(self.videos,2)
if user:
self.user_votes = models.voting.Voting.query_by_user(user.key)
if self.user_votes != None:
self.user_votes = [[x.video_one,x.video_two] for x in self.user_votes]
page_vids = False
while page_vids == False and len(self.videos)>1:
rand_vid = random.choice(self.videos)
page_vids = self.find_match(rand_vid)
self.videos.remove(rand_vid)
else:
page_vids = random.sample(self.videos,2)
else:
page_vids = random.sample(self.videos,2)
except:
page_vids = None
def find_match(self, rand_vid):
i =0
while i < len(self.videos):
if rand_vid.key != self.videos[i].key and ([rand_vid.key,self.videos[i].key] not in self.user_votes and [self.videos[i].key, rand_vid.key] not in self.user_votes):
return [rand_vid,self.videos[i]]
i+=1
return False
class Videos(ndb.Model):
acc_key = ndb.KeyProperty()
musician_key = ndb.KeyProperty()
musician_name = ndb.StringProperty()
embed_link = ndb.StringProperty()
genre_tag = ndb.StringProperty()
video_title = ndb.StringProperty()
featured = ndb.BooleanProperty(default = False)
likes_count = ndb.IntegerProperty()
video_added = ndb.DateTimeProperty(auto_now_add = True)
#classmethod
def query_by_account(cls, acc_key):
return cls.query(cls.acc_key == acc_key).fetch()
#classmethod
def fetch_featured(cls):
return cls.query(cls.featured == True).fetch(100)
class Voting(ndb.Model):
voter_acc_key = ndb.KeyProperty()
voter_type = ndb.StringProperty()
video_one = ndb.KeyProperty()
video_one_artist_key = ndb.KeyProperty()
video_two = ndb.KeyProperty()
video_two_artist_key = ndb.KeyProperty()
voter_choice = ndb.KeyProperty()
video_set_check = ndb.KeyProperty(repeated = True)
voter_ip = ndb.StringProperty()
vote_time = ndb.DateTimeProperty(auto_now_add = True)
#classmethod
def query_by_user(cls, acc_key):
return cls.query(cls.voter_acc_key == acc_key).fetch(2000)

Using the same method name for two different method headers

I'm experiencing a little issue, while working with Python/Django. I have a class, called 'Plantao', and a method 'get_ultima_posicao', which, I wanted to behave in the following way:
If called by the class, like Plantao.get_ultima_posicao, it should expect two parameters, one mandatory and one optional. And when called by a class object, like p.get_ultima_posicao (p being an instance of Plantao) the only parameter it should expect would be self.
Is there a way of doing that?
I know there's a way of doing something like polymorphism, using (self, *args) in the method header, but since one of my methods should be static, using #staticmethod, it doesn't have self as a parameter.
EDIT
In [4]: Plantao.get_ultima_posicao(empresa = 1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/*************************************/<ipython console> in <module>()
TypeError: unbound method get_ultima_posicao() must be called with Plantao instance as first argument (got nothing instead)
In [5]: planta.get_ultima_posicao()
Out[5]: 0
Where planta.empresa = 1. I wanted that to work, and both executions returning the same values
I wanted the methods definitions to be something like:
def get_ultima_posicao(self):
AND
#staticmethod
def get_ultima_posicao(empresa, data = datetime.date.today()):
EDIT
Ok, here's my model for the Plantao class:
usuario = models.ForeignKey(Usuario, null=True, blank=True) # user
empresa = models.ForeignKey(Empresa, null=True, blank=True) # department
posicao = models.IntegerField(default=0) # position in queue
data = models.DateField(null=True, blank=True) # date added in queue
So, I may know which deparment I want to get an user from, so why would I need to retrieve one Plantao object just to search for it? At the same time, if I already retrieved it, why would I need to get the deparment again, if it's already in Plantao object? That's why I want to do that.
EDIT
In hope it may help finding a solution I'm adding the functions codes here:
#staticmethod
def get_ultima_posicao(empresa, data = datetime.date.today()):
if empresa is None:
return 'Empresa invalida'
ultima = Plantao.get_ultimo_fila(empresa = empresa)
plantao = Plantao.objects.filter(data=data, empresa=empresa).order_by('posicao')
usuario = None
for i in plantao:
ultima += 1
i.posicao = ultima
i.save()
if i.usuario.atendimento_pendente() == False and i.usuario.cliente_aguardando() == False and i.usuario.esta_online() == True:
return i.usuario
return "Nenhum usuario disponivel na escala"
def get_ultima_posicao(self):
if self.empresa is None:
return 'Empresa invalida'
ultima = Plantao.get_ultimo_fila(empresa = self.empresa)
plantao = Plantao.objects.filter(data=self.data, empresa=self.empresa).order_by('posicao')
usuario = None
for i in plantao:
ultima += 1
i.posicao = ultima
i.save()
if i.usuario.atendimento_pendente() == False and i.usuario.cliente_aguardando() == False and i.usuario.esta_online() == True:
return i.usuario
return "Nenhum usuario disponivel na escala"

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Iteration and memory problems in Django - python

Related

How do I print all of the instances from a set of variables that are undefined from the beginning?

Multhreading with updates to MySQL

Django DB object filter first not getting new items

Random match-up with Python and Google App Engine

Using the same method name for two different method headers

Categories

Resources