Python 2.7 yield is creating strange behavior

Python 2.7 yield is creating strange behavior - python

Using Python 2.7....
The print thread["Subject"] should return:
please ignore - test2
and
Please ignore - test
It does successfully with this:
def thread_generator(threads):
tz = pytz.timezone('America/Chicago')
POSTS_SINCE_HOURS = 24
now_date = datetime.datetime.now(tz)
time_stamp = now_date - datetime.timedelta(hours=POSTS_SINCE_HOURS)
for thread in threads:
last_post_time = convert_time(thread["LatestPostDate"])
if last_post_time > time_stamp:
print thread["Subject"]
row = {
"url": thread["Url"],
"author_username": thread["Author"]["Username"],
"author_name": thread["Author"]["DisplayName"],
"thread_id": thread["Id"],
"forum_id": thread["ForumId"],
"subject": thread["Subject"],
"created_date": thread["Content"]["CreatedDate"],
"reply_count": thread["ReplyCount"],
"latest_post_date": thread["LatestPostDate"],
"latest_reply_author": thread["LatestForumReplyAuthorId"] }
But, when adding the yield row the print thread["Subject"] does not show Please ignore - test as it should.
def thread_generator(threads):
tz = pytz.timezone('America/Chicago')
POSTS_SINCE_HOURS = 24
now_date = datetime.datetime.now(tz)
time_stamp = now_date - datetime.timedelta(hours=POSTS_SINCE_HOURS)
for thread in threads:
last_post_time = convert_time(thread["LatestPostDate"])
if last_post_time > time_stamp:
print thread["Subject"]
row = {
"url": thread["Url"],
"author_username": thread["Author"]["Username"],
"author_name": thread["Author"]["DisplayName"],
"thread_id": thread["Id"],
"forum_id": thread["ForumId"],
"subject": thread["Subject"],
"created_date": thread["Content"]["CreatedDate"],
"reply_count": thread["ReplyCount"],
"latest_post_date": thread["LatestPostDate"],
"latest_reply_author": thread["LatestForumReplyAuthorId"] }
yield row
Why is this? Please ignore - test should still show with print thread["Subject"]. Makes no sense to me.
UPDATE: How the generators is called
def sanitize_threads(threads):
for thread in thread_generator(threads):
do stuff
thread_batch.append(thread)
return thread_batch
def get_unanswered_threads():
slug = 'forums/threads/unanswered.json?PageSize=100'
status_code, threads = do_request(slug)
if status_code == 200:
threads = threads["Threads"]
thread_batch = sanitize_threads(threads)
database_update(thread_batch)

Have you tried actually calling next() on the resultant generator? If you call the function with yield the same way you call the function without, in the yield case you'll get a generator object as a result. A generator doesn't evaluate what's inside it until you actually require a value of it, which can be done with next(generator).
For example:
>>> def nogen():
... '''Create a list of values 0-3 and print each one as you build the list.'''
... r = []
... for i in range(3):
... print(i)
... r.append(i)
... return r
...
>>> def firstgen():
... '''Create an iterator of values 0-3 and print each one as you yield the value.'''
... for i in range(3):
... print(i)
... yield i
...
>>> a = nogen() # This function is eager and does everything at once.
0
1
2
>>> a
[0, 1, 2]
>>> b = firstgen() # Note there's no output immediately after this line.
>>> list(b) # The output happens as the list coercion causes the generator to execute.
0
1
2
[0, 1, 2]
>>> c = firstgen() # You can also see this a step at a time using `next`
>>> next(c)
0
0
>>> next(c)
1
1
>>> next(c)
2
2
>>> next(c)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration

Related

When I call a method inside a class it is not working but it works outside?

def lookup_cell(self, column, row):
return(self.puzzle[row-1][column-1])
def lookup_column(self, column):
output = []
for i in range(9):
output.append(self.lookup_cell(column, i+1))
return output
def check_puzzle(self):
valid = True
#check all the rows
for i in range(1,10):
row = self.lookup_row(i)
while 0 in row: row.remove(0)
for i in range(1,10):
if row.count(i) > 1:
valid = False
#check all the columns
for i in range(1,10):
print(i)
print(easy.lookup_column(i))
puzzle = '''0,9,0,7,5,1,0,2,3 /n
2,1,8,6,0,3,7,5,4 /n
0,0,0,4,0,2,0,0,0 /n
1,0,0,0,0,0,0,9,2 /n
0,0,0,5,0,0,3,8,0 /n
3,0,0,8,2,0,5,0,6 /n
0,0,0,0,7,0,0,4,8 /n
0,4,9,0,0,0,0,7,0 /n
0,2,0,0,0,5,6,3,1 '''
easy = Sudoku(puzzle)
Here the code works and prints the columns of the puzzle properly:
for i in range(1,10):
print(easy.lookup_column(i))
when this runs I get an error which I'll add below:
easy.check_puzzle()
1 [9, 2, 4, 1, 5, 3, 7, 4, 2] 2 [7, 1, 2, 9, 3, 8, 4, 9, 5] 3
Traceback (most recent call last):
File "/Users/ellis/Desktop/Sudoku.py", line 121, in <module>
easy.check_puzzle()
File "/Users/ellis/Desktop/Sudoku.py", line 81, in check_puzzle
print(easy.lookup_column(i))
File "/Users/ellis/Desktop/Sudoku.py", line 65, in lookup_column
output.append(self.lookup_cell(column, i+1))
File "/Users/ellis/Desktop/Sudoku.py", line 19, in lookup_cell
return(self.puzzle[row-1][column-1])
IndexError: list index out of range

Your code works outside of the class method because
for i in range(1,10):
print(easy.lookup_column(i))
is not calling
def lookup_cell(self, column, row):
return(self.puzzle[row-1][column-1]
The traceback you posted shows that the code above is causing the issue, where your accessing values in the object that's outside its bounds

I'm not exactly sure what you are trying to accomplish, but why not use a list for puzzle instead of a string?
Try this:
class Sudoku(object):
def __init__(self,puzzle):
self.puzzle = puzzle
print("Inside class; column lookup")
for i in range(1,10):
print('Column {0} ='.format(i),self.lookup_column(i))
self.check_puzzle()
def lookup_cell(self, column, row):
return self.puzzle[row-1][column-1]
def lookup_row(self,row):
output = []
for i in range(9):
output.append(self.lookup_cell(i+1, row))
return output
def lookup_column(self, column):
output = []
for i in range(9):
output.append(self.lookup_cell(column, i+1))
return output
def check_puzzle(self):
valid = True
#check all the rows
for i in range(1,10):
row = self.lookup_row(i)
while 0 in row: row.remove(0)
for i in range(1,10):
if row.count(i) > 1:
valid = False
puzzle = [
[0,9,0,7,5,1,0,2,3],
[2,1,8,6,0,3,7,5,4],
[0,0,0,4,0,2,0,0,0],
[1,0,0,0,0,0,0,9,2],
[0,0,0,5,0,0,3,8,0],
[3,0,0,8,2,0,5,0,6],
[0,0,0,0,7,0,0,4,8],
[0,4,9,0,0,0,0,7,0],
[0,2,0,0,0,5,6,3,1]]
easy = Sudoku(puzzle)
print("Outside class; column lookup")
for i in range(1,10):
print('Column {0} ='.format(i),easy.lookup_column(i))
easy.check_puzzle()

Combine ~Q and F in Django?

Query
Balance.objects.filter(~Q(fax_date=F('paused_date')))
returns empty qs even though I do have objects that fit the condition "fax date field not equal to paused date". Is it possible to use ~Q and F together like that?
ran a test like this:
deals = Deal.objects.all()
balance_pre = Balance.objects.filter(~Q(fax_date=F('paused_date')), fax_date__isnull=False, reserved=False)
agr_nums = list(deals.filter(agr_name__isnull=False).values_list('agr_name', flat=True).distinct())
agrs_with_fax = 0
for agr_num in agr_nums:
try:
balance_agr = Balance.objects.get(number__icontains=agr_num)
if balance_agr.fax_date is not None and balance_agr.fax_date != balance_agr.paused_date and not balance_agr.reserved:
agrs_with_fax += 1
except Balance.DoesNotExist:
pass
agrs_with_fax2 = 0
for agr_num in agr_nums:
try:
balance_pre.get(number__icontains=agr_num)
agrs_with_fax2 += 1
except Balance.DoesNotExist:
pass
r = [agrs_with_fax, agrs_with_fax2, balance_agr.fax_date, balance_agr.paused_date, balance_agr.reserved]
r returned is
[55, 0, datetime.date(2018, 7, 11), None, False]
I don't see my error, both cycles should return same result.

I created a Balance model in a fresh project just to test that print(qs.query) will show you the generated query(not in all cases) in this case. I used also exclude as #daniel-roseman suggested to prove that they were equivalent. I hope this help you.
>>> from django.db.models import F, Q
>>> qs = Balance.objects.filter(~Q(fax_date=F('paused_date')))
>>> print(qs.query)
SELECT "so_balance"."id", "so_balance"."fax_date", "so_balance"."paused_date"
FROM "so_balance" WHERE NOT ("so_balance"."fax_date" =
("so_balance"."paused_date"))
>>> qs = Balance.objects.exclude(fax_date=F('paused_date'))
>>> print(qs.query)
SELECT "so_balance"."id", "so_balance"."fax_date", "so_balance"."paused_date"
FROM "so_balance" WHERE NOT ("so_balance"."fax_date" =
("so_balance"."paused_date"))

Doing operations on a large data set

I have to perform some analysis on a PSL record which contains information on DNA sequence fragments. Basically I have to find entries that are from the same read in the same contig (these are both values in the PSL entry). The problem is the PSL records are large (10-30 Mb text documents). I wrote a program that works on short records and on the long records given enough time but it took way longer than specified. I was told the program shouldn't take more than ~15 seconds. Mine took over 15 minutes.
PSL records look like this:
275 11 0 0 0 0 0 0 - M02034:35:000000000-A7UU0:1:1101:19443:1992/2 286 0 286 NODE_406138_length_13407_cov_13.425076 13465 408 694 1 286, 0, 408,
171 5 0 0 0 0 0 0 + M02034:35:000000000-A7UU0:1:1101:13497:2001/2 294 0 176 NODE_500869_length_34598_cov_30.643419 34656 34334 34510 1 176, 0, 34334,
188 14 0 10 0 0 0 0 + M02034:35:000000000-A7UU0:1:1101:18225:2002/1 257 45 257 NODE_455027_length_12018_cov_13.759444 12076 11322 11534 1 212, 45, 11322,
My code looks like this:
import sys
class PSLreader :
'''
Class to provide reading of a file containing psl alignments
formatted sequences:
object instantiation:
myPSLreader = PSLreader(<file name>):
object attributes:
fname: the initial file name
methods:
readPSL() : reads psl file, yielding those alignments that are within the first or last
1000 nt
readPSLpairs() : yields psl pairs that support a circular hypothesis
Author: David Bernick
Date: May 12, 2013
'''
def __init__ (self, fname=''):
'''contructor: saves attribute fname '''
self.fname = fname
def doOpen (self):
if self.fname is '':
return sys.stdin
else:
return open(self.fname)
def readPSL (self):
'''
using filename given in init, returns each filtered psl records
that contain alignments that are within the terminal 1000nt of
the target. Incomplete psl records are discarded.
If filename was not provided, stdin is used.
This method selects for alignments that could may be part of a
circle.
Illumina pairs aligned to the top strand would have read1(+) and read2(-).
For the bottoms trand, read1(-) and read2(+).
For potential circularity,
these are the conditions that can support circularity:
read1(+) near the 3' terminus
read1(-) near the 5' terminus
read2(-) near the 5' terminus
read2(+) near the 3' terminus
so...
any read(+) near the 3', or
any read(-) near the 5'
'''
nearEnd = 1000 # this constant determines "near the end"
with self.doOpen() as fileH:
for line in fileH:
pslList = line.split()
if len(pslList) < 17:
continue
tSize = int(pslList[14])
tStart = int(pslList[15])
strand = str(pslList[8])
if strand.startswith('+') and (tSize - tStart > nearEnd):
continue
elif strand.startswith('-') and (tStart > nearEnd):
continue
yield line
def readPSLpairs (self):
read1 = []
read2 = []
for psl in self.readPSL():
parsed_psl = psl.split()
strand = parsed_psl[9][-1]
if strand == '1':
read1.append(parsed_psl)
elif strand == '2':
read2.append(parsed_psl)
output = {}
for psl1 in read1:
name1 = psl1[9][:-1]
contig1 = psl1[13]
for psl2 in read2:
name2 = psl2[9][:-1]
contig2 = psl2[13]
if name1 == name2 and contig1 == contig2:
try:
output[contig1] += 1
break
except:
output[contig1] = 1
break
print(output)
PSL_obj = PSLreader('EEV14-Vf.filtered.psl')
PSL_obj.readPSLpairs()
I was given some example code that looks like this:
def doSomethingPairwise (a):
for leftItem in a[1]:
for rightItem in a[2]:
if leftItem[1] is rightItem[1]:
print (a)
thisStream = [['David', 'guitar', 1], ['David', 'guitar', 2],
['John', 'violin', 1], ['John', 'oboe', 2],
['Patrick', 'theremin', 1], ['Patrick', 'lute',2] ]
thisGroup = None
thisGroupList = [ [], [], [] ]
for name, instrument, num in thisStream:
if name != thisGroup:
doSomethingPairwise(thisGroupList)
thisGroup = name
thisGroupList = [ [], [], [] ]
thisGroupList[num].append([name, instrument, num])
doSomethingPairwise(thisGroupList)
But when I tried to implement it my program still took a long time. Am I thinking about this the wrong way? I realize the nested loop is slow but I don't see an alternative.
Edit: I figured it out, the data was presorted which made my brute force solution very impractical and unnecessary.

I hope help you, since, the question needs a best input example file
#is better create PSLRecord class
class PSLRecord:
def __init__(self, line):
pslList = line.split()
properties = ("matches", "misMatches", "repMatches", "nCount",
"qNumInsert", "qBaseInsert", "tNumInsert",
"tBaseInsert", "strand", "qName", "qSize", "qStart",
"qEnd", "tName", "tSize", "tStart", "tEnd", "blockCount",
"blockSizes", "qStarts", "tStarts")
self.__dict__.update(dict(zip(properties, pslList)))
class PSLreader :
def __init__ (self, fname=''):
self.fname = fname
def doOpen (self):
if self.fname is '':
return sys.stdin
else:
return open(self.fname)
def readPSL (self):
with self.doOpen() as fileH:
for line in fileH:
pslrc = PSLRecord(line)
yield pslrc
#return a dictionary with all psl records group by qName and tName
def readPSLpairs (self):
dictpsl = {}
for pslrc in self.readPSL():
#OP requirement, remove '1' or '2' char, in pslrc.qName[:-1]
key = (pslrc.qName[:-1], pslrc.tName)
if not key in dictpsl:
dictpsl[key] = []
dictpsl[key].append(pslrc)
return dictpsl
#Function filter .... is better out and self-contained
def f_filter(pslrec, nearEnd = 1000):
if (pslrec.strand.startswith('+') and
(int(pslrec.tSize) - int(pslrec.tStart) > nearEnd)):
return False
if (pslrec.strand.startswith('-') and
(int(pslrec.tStart) > nearEnd)):
return False
return True
PSL_obj = PSLreader('EEV14-Vf.filtered.psl')
#read dictionary of pairs
dictpsl = PSL_obj.readPSLpairs()
from itertools import product
#product from itertools
#(1) x (2,3) = (1,2),(1,3)
output = {}
for key, v in dictpsl.items():
name, contig = key
#i get filters aligns in principal strand
strand_princ = [pslrec for pslrec in v if f_filter(pslrec) and
pslrec.qName[-1] == '1']
#i get filters aligns in secondary strand
strand_sec = [pslrec for pslrec in v if f_filter(pslrec) and
pslrec.qName[-1] == '2']
for pslrec_princ, pslrec_sec in product(strand_princ, strand_sec):
#This For has fewer comparisons, since I was grouped before
if not contig in output:
output[contig] = 1
output[contig] += 1
Note: 10-30 Mb isn't large file, if you ask me

Does pysvn let you limit log messages in reverse order?

Essentially I want the same behaviour as running:
log = client.log(url)
oldestEntry = log[-1]
Except without having to download the entire log. I know setting
limit=1
lets you find the newest entry. Is there any way of limiting from the reverse order?

Reverse the order of the revision_start and revision_end and set limit to 1:
import pysvn
url='http://svn.apache.org/repos/asf/httpd/httpd/trunk/README'
epoch = pysvn.Revision(pysvn.opt_revision_kind.number, 0)
head = pysvn.Revision(pysvn.opt_revision_kind.head)
client = pysvn.Client()
# Get all entries
l = client.log(url)
print len(l), l[0].revision, l[-1].revision
# Get most recent entry:
l = client.log(url, limit=1)
print len(l), l[0].revision
# Get most recent entry, again:
l = client.log(url, revision_start=head, revision_end=epoch, limit=1)
print len(l), l[0].revision
# Get least recent entry
l = client.log(url, revision_start=epoch, revision_end=head, limit=1)
print len(l), l[0].revision
The result is:
22 <Revision kind=number 1209505> <Revision kind=number 87470>
1 <Revision kind=number 1209505>
1 <Revision kind=number 1209505>
1 <Revision kind=number 87470>

load parameters from a file in Python

I am writing a Python class to model a process and I want to initialized the parameters from a file, say 'input.dat'. The format of the input file looks like this.
'input.dat' file:
Z0: 0 0
k: 0.1
g: 1
Delta: 20
t_end: 300
The code I wrote is the following. It works but appears redundant and inflexible. Is there a better way to do the job? Such as a loop to do readline() and then match the keyword?
def load(self,filename="input.dat"):
FILE = open(filename)
s = FILE.readline().split()
if len(s) is 3:
self.z0 = [float(s[1]),float(s[2])] # initial state
s = FILE.readline().split()
if len(s) is 2:
self.k = float(s[1]) # kappa
s = FILE.readline().split()
if len(s) is 2:
self.g = float(s[1])
s = FILE.readline().split()
if len(s) is 2:
self.D = float(s[1]) # Delta
s = FILE.readline().split()
if len(s) is 2:
self.T = float(s[1]) # end time

Assuming the params are coming from a safe place (made by you or users, not the internet), just make the parameters file a Python file, params.py:
Z0 = (0, 0)
k = 0.1
g = 1
Delta = 20
t_end = 300
Then in your code all you need is:
import params
fancy_calculation(10, k=params.k, delta=params.Delta)
The beauty of this is two-fold: 1) simplicity, and 2) you can use the power of Python in your parameter descriptions -- particularly useful here, for example:
k = 0.1
Delta = 20
g = 3 * k + Delta
Alternatively, you could use Python's built-in JSON or ConfigParser .INI parser modules.

If you are open to some other kind of file where you can keep your parameters, I would suggest you to use a YAML file.
The Python library is PyYAML. This is how you can easily use it with Python.
For a better introduction, look at this Wikipedia article: http://en.wikipedia.org/wiki/YAML.
The benefit is you can read the parameter values as lists or maps.
You would love it!

Try the following:
def load(self, filename="input.dat"):
d = {"Z0": "z0", "k": "k", "g": "g", "Delta": "D", "t_end": "T"}
FILE = open(filename)
for line in FILE:
name, value = line.split(":")
value = value.strip()
if " " in value:
value = map(float, value.split())
else:
value = float(value)
setattr(self, d[name], value)
Proof that it works:
>>> class A(object): pass
...
>>> a = A()
>>> load(a)
>>> a.__dict__
{'k': 0.10000000000000001, 'z0': [0.0, 0.0], 'D': 20.0, 'g': 1.0, 'T': 300.0}

As others have mentioned, in Python you can create object attributes dynamically "on the fly". That means you could do something like the following to create Params objects as they're read-in. I've tried to make the code as data-driven as possible, so relatively flexible.
# maps label to attribute name and types
label_attr_map = {
"Z0:": ["z0", float, float],
"k:": [ "k", float],
"g:": [ "g", float],
"Delta:": [ "D", float],
"t_end:": [ "T", float]
}
class Params(object):
def __init__(self, input_file_name):
with open(input_file_name, 'r') as input_file:
for line in input_file:
row = line.split()
label = row[0]
data = row[1:] # rest of row is data list
attr = label_attr_map[label][0]
datatypes = label_attr_map[label][1:]
values = [(datatypes[i](data[i])) for i in range(len(data))]
self.__dict__[attr] = values if len(values) > 1 else values[0]
params = Params('input.dat')
print 'params.z0:', params.z0
print 'params.k:', params.k
print 'params.g:', params.g
print 'params.D:', params.D
print 'params.T:', params.T
Output:
params.z0: [0.0, 0.0]
params.k: 0.1
params.g: 1.0
params.D: 20.0
params.T: 300.0

Perhaps this might give you what you need:
def load(self,filename='input.dat'):
with open(filename) as fh:
for line in fh:
s = line.split()
if len(s) == 2:
setattr(self,s[1],s[2])
elif len(s) == 3:
setattr(self,s[1],s[2:])
I also didn't include any error checking, but setattr is very handy.

Something like this:
def load(self,filename="input.dat"):
# maps names to number of fields they need
# only necessary for variables with more than 1 field
argmap = dict(Z0=2)
# maps config file names to their attribute names on the object
# if name is the same both places, no need
namemap = dict(Z0="z0", Delta="D", t_end="T")
with open(filename) as FILE:
for line in FILE:
s = line.split()
var = s[0].rstrip(":")
try:
val = [float(x) for x in s[1:]]
except ValueError:
continue
if len(val) == varmap.get(var, 1):
if len(val) == 1:
val = val[0]
setattr(self, namemap.get(var, var), val)

Python objects have a built-in __dict__ member. You can modify it, and then refer to properties as obj.key.
class Data(object):
def __init__(self, path='infile.dat'):
with open(path, 'r') as fo:
for line in fo.readlines():
if len(line) < 2: continue
parts = [s.strip(' :\n') for s in line.split(' ', 1)]
numbers = [float(s) for s in parts[1].split()]
# This is optional... do you want single values to be stored in lists?
if len(numbers) == 1: numbers = numbers[0]
self.__dict__[parts[0]] = numbers
# print parts -- debug
obj = Data('infile.dat')
print obj.g
print obj.Delta
print obj.Z0
At the end of this, we print out a few of the keys. Here's the output of those.
1.0
20.0
[0.0, 0.0]
For consistency, you can remove the line marked "optional" in my code, and have all objects in lists -- regardless of how many elements they have. That will make using them quite a bit easier, because you never have to worry about obj.g[0] returning an error.

Here's another one
def splitstrip(s):
return s.split(':')[1].strip()
with open('input.dat','r') as f:
a.z0 = [float(x) for x in splitstrip(f.readline()).split(' ')]
a.k, a.g, a.D, a.T = tuple([float(splitstrip(x)) for x in f.read().rstrip().split('\n')])
;)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python 2.7 yield is creating strange behavior - python

Related

When I call a method inside a class it is not working but it works outside?

Combine ~Q and F in Django?

Doing operations on a large data set

Does pysvn let you limit log messages in reverse order?

load parameters from a file in Python

Categories

Resources