I can find one other place where this problem is specifically addressed, but it seems to be even more personal in nature and the other thread provided no help. This is the error:
# Error: Maya command error
# Traceback (most recent call last):
# File "<maya console>", line 1, in <module>
# File "<maya console>", line 26, in createJoints
# RuntimeError: Maya command error #
And here is the code, to create a chain of joints along a premade curve:
import maya.cmds as mc
##UI##
jointCurve = mc.window(title = "Create Joint Chain") #window title#
mc.rowColumnLayout(nc = 2) #divides window into two columns#
mc.text(l = "Number of Joints") #dialog prompt#
jntAmount = mc.intField (v = 5, min = 2) #default value of 5, minimum value of 2 joints#
mc.button(l = "Create", w = 150, c = "createJoints()") #confirms entry#
mc.button(l = "Cancel", w = 150, c = "mc.deleteUI(jointCurve)") #closes window, deletes UI#
mc.showWindow() #now that all parameters are defined, the window is opened#
##Method##
def createJoints():
#Gathering Information#
selCurve = mc.ls(sl = True) [0]
jointAmount = mc.intField(jntAmount, q = True, v = True)
prevJnt = ""
rootJnt = ""
for i in range(0, jointAmount): #for each increment from 0 to the amount defined in the field,#
mc.select(cl = True) #clear the selection,#
newJnt = mc.joint() #create a joint,#
motionPath = mc.pathAnimation(newJnt, c = selCurve, fractionMode = True) #affix to curve using a motion path,#
mc.cutKey(motionPath + ".u", time = ()) #remove animation keys from joint's U value (position on curve),#
mc.setAttr(motionPath + ".u", i * (1.0/(jointAmount - 1))) #distribute joints evenly along curve#
#delete motion path#
mc.delete(newJnt + ".tx", icn = True)#deletes x translate value's input connections#
mc.delete(newJnt + ".ty", icn = True)#deletes y translate value's input connections#
mc.delete(newJnt + ".tz", icn = True)#deletes z translate value's input connections#
mc.delete(motionPath) #deletes motion path itself#
if i == 0:
prevJnt = newJnt
rootJnt = newJnt
continue #skips next instructions for root joint since it has no parent#
mc.parent(newJnt, prevJnt) #parents current joint to previous joint#
prevJnt = newJnt #sets status of "previous" joint to current joint#
mc.joint(rootJnt, e = True, oj = "xyz", sao = "yup", ch = True, zso = True) #orients joints along chain#
mc.deleteUI(jointCurve) #closes prompt window#
Whenever I run the code in a new file, it works perfectly, no issues.
But I have another file that, when I run this code, it throws the above error.
So it looks like it's having a problem somewhere with importing Maya commands?
as well as with this line, in particular:
mc.cutKey(motionPath + ".u", time = ())
For some reason, this error is thrown even when I start a new file, and import the other file into the scene.
I have absolutely no idea where to start looking..
The issue is your ui which is prior to the creation of your command
import maya.cmds as mc
from functools import partial
##Method##
def createJoints(*args):
#Gathering Information#
selCurve = mc.ls(sl = True) [0]
jointAmount = mc.intField(jntAmount, q = True, v = True)
prevJnt = ""
rootJnt = ""
for i in range(0, jointAmount): #for each increment from 0 to the amount defined in the field,#
mc.select(cl = True) #clear the selection,#
newJnt = mc.joint() #create a joint,#
motionPath = mc.pathAnimation(newJnt, c = selCurve, fractionMode = True) #affix to curve using a motion path,#
mc.cutKey(motionPath + ".u", time = ()) #remove animation keys from joint's U value (position on curve),#
mc.setAttr(motionPath + ".u", i * (1.0/(jointAmount - 1))) #distribute joints evenly along curve#
#delete motion path#
mc.delete(newJnt + ".tx", icn = True)#deletes x translate value's input connections#
mc.delete(newJnt + ".ty", icn = True)#deletes y translate value's input connections#
mc.delete(newJnt + ".tz", icn = True)#deletes z translate value's input connections#
mc.delete(motionPath) #deletes motion path itself#
if i == 0:
prevJnt = newJnt
rootJnt = newJnt
continue #skips next instructions for root joint since it has no parent#
mc.parent(newJnt, prevJnt) #parents current joint to previous joint#
prevJnt = newJnt #sets status of "previous" joint to current joint#
mc.joint(rootJnt, e = True, oj = "xyz", sao = "yup", ch = True, zso = True) #orients joints along chain#
mc.deleteUI(jointCurve) #closes prompt window#
def cancelUI(jointCurve, *args):
mc.deleteUI(jointCurve)
##UI##
jointCurve = mc.window(title = "Create Joint Chain") #window title#
mc.rowColumnLayout(nc = 2) #divides window into two columns#
mc.text(l = "Number of Joints") #dialog prompt#
jntAmount = mc.intField (v = 5, min = 2) #default value of 5, minimum value of 2 joints#
mc.button(l = "Create", w = 150, c = createJoints) #confirms entry#
mc.button(l = "Cancel", w = 150, c = partial(cancelUI, jointCurve)) #closes window, deletes UI#
mc.showWindow() #now that all parameters are defined, the window is opened#
Related
everyone.
I have some problems with calculating gcskews in python.
My 2 major inputs are fasta file and bed file.
Bed file has columns of gn(0), gene_type(1), gene name(2), chromosome(3), strand(4), num(5), start(6).(These numbers are index numbers in python.) Then I am trying to use some functions which can calculate gcskews of sense and antisense strand from the start site of each gene. The window is 100bp and these are the functions.
import re
import sys
import os
# opening bed file
content= []
with open("gene_info.full.tsv") as new :
for line in new :
content.append(line.strip().split())
content = content[1:]
def fasta2dict(fil):
dic = {}
scaf = ''
seq = []
for line in open(fil):
if line.startswith(">") and scaf == '':
scaf = line.split(' ')[0].lstrip(">").replace("\n", "")
elif line.startswith(">") and scaf != '':
dic[scaf] = ''.join(seq)
scaf = line.split(' ')[0].lstrip(">").replace("\n", "")
seq = []
else:
seq.append(line.rstrip())
dic[scaf] = ''.join(seq)
return dic
dic_file = fasta2dict("full.fa")
# functions for gc skew
def GC_skew_up(strand, loc, seq, window = 100) : # need -1 for index
values_up = []
loc = loc - 1
if strand == "+" :
sp_up = seq[loc - window : loc]
g_up = sp_up.count('G') + sp_up.count('g')
c_up = sp_up.count('C') + sp_up.count('c')
try :
skew_up = (g_up - c_up) / float(g_up + c_up)
except ZeroDivisionError:
skew_up = 0.0
values_up.append(skew_up)
elif strand == "-" :
sp_up = seq[loc : loc + window]
g_up = sp_up.count('G') + sp_up.count('g')
c_up = sp_up.count('C') + sp_up.count('c')
try :
skew_up = (c_up - g_up) / float(g_up + c_up)
except ZeroDivisionError:
skew_up = 0.0
values_up.append(skew_up)
return values_up
def GC_skew_dw(strand, loc, seq, window = 100) :
values_dw = []
loc = loc - 1
if strand == "+" :
sp_dw = seq[loc : loc + window]
g_dw = sp_dw.count('G') + sp_dw.count('g')
c_dw = sp_dw.count('C') + sp_dw.count('c')
try :
skew_dw = (g_dw - c_dw) / float(g_dw + c_dw)
except ZeroDivisionError:
skew_dw = 0.0
values_dw.append(skew_dw)
elif strand == "-" :
sp_dw = seq[loc - window : loc]
g_dw = sp_dw.count('G') + sp_dw.count('g')
c_dw = sp_dw.count('C') + sp_dw.count('c')
try :
skew_dw = (c_dw - g_dw) / float(g_dw + c_dw)
except ZeroDivisionError:
skew_dw = 0.0
values_dw.append(skew_dw)
return values_dw
As I said, I want to calculate the gcskews for 100bp of strands from the start site of genes.
Therefore, I made codes that get the chromosome name from the bed file and get the sequence data from the Fasta file.
Then according to gene name and strand information, I expected that codes will find the correct start site and gcskew for 100bp window will be calculated.
However, when I run this code, gcskew of - strand is wrong but + strand is correct. (I got correct gcskew data and I used it.)
Gcskews are different from the correct data, but I don't know what is the problem.
Could anyone tell me what is the problem of this code?
Thanks in advance!
window = 100
gname = []
up = []
dw = []
for match in content :
seq_chr = dic_file[str(match[3])]
if match[4] == "+" :
strand = match[4]
new = int(match[6])
sen_up = GC_skew_up(strand, new, seq_chr, window = 100)
sen_dw = GC_skew_dw(strand, new, seq_chr, window = 100)
gname.append(match[2])
up.append(str(sen_up[0]))
dw.append(str(sen_dw[0]))
if match[4] == "-" :
strand = match[4]
new = int(match[6])
an_up = GC_skew_up(strand, new, seq_chr, window = 100)
an_dw = GC_skew_dw(strand, new, seq_chr, window = 100)
gname.append(match[2])
up.append(str(an_up[0]))
dw.append(str(an_dw[0]))
tot = zip(gname, up, dw)
I am trying to use vpython glowscript to generate some animations.
The code works fine, but the problem is the display is through a browser, and I am not able to figure out how to save the display as gif. Here is a working code:
GlowScript 2.8 VPython
# Bruce Sherwood
N = 4 # N by N by N array of atoms
# Surrounding the N**3 atoms is another layer of invisible fixed-position atoms
# that provide stability to the lattice.
k = 1
m = 1
spacing = 1
atom_radius = 0.3*spacing
L0 = spacing-1.8*atom_radius
V0 = pi*(0.5*atom_radius)**2*L0 # initial volume of spring
scene.center = 0.5*(N-1)*vector(1,1,1)
dt = 0.04*(2*pi*sqrt(m/k))
axes = [vector(1,0,0), vector(0,1,0), vector(0,0,1)]
scene.caption= """A model of a solid represented as atoms connected by interatomic bonds.
Right button drag or Ctrl-drag to rotate "camera" to view scene.
To zoom, drag with middle button or Alt/Option depressed, or use scroll wheel.
On a two-button mouse, middle is left + right.
Shift-drag to pan left/right and up/down.
Touch screen: pinch/extend to zoom, swipe or two-finger rotate."""
class crystal:
def __init__(self, N, atom_radius, spacing, momentumRange ):
self.atoms = []
self.springs = []
# Create (N+2)^3 atoms in a grid; the outermost atoms are fixed and invisible
for z in range(-1,N+1,1):
for y in range(-1,N+1,1):
for x in range(-1,N+1,1):
atom = sphere()
atom.pos = vector(x,y,z)*spacing
atom.radius = atom_radius
atom.color = vector(0,0.58,0.69)
if 0 <= x < N and 0 <= y < N and 0 <= z < N:
p = vec.random()
atom.momentum = momentumRange*p
else:
atom.visible = False
atom.momentum = vec(0,0,0)
atom.index = len(self.atoms)
self.atoms.append( atom )
for atom in self.atoms:
if atom.visible:
if atom.pos.x == 0:
self.make_spring(self.atoms[atom.index-1], atom, False)
self.make_spring(atom, self.atoms[atom.index+1], True)
elif atom.pos.x == N-1:
self.make_spring(atom, self.atoms[atom.index+1], False)
else:
self.make_spring(atom, self.atoms[atom.index+1], True)
if atom.pos.y == 0:
self.make_spring(self.atoms[atom.index-(N+2)], atom, False)
self.make_spring(atom, self.atoms[atom.index+(N+2)], True)
elif atom.pos.y == N-1:
self.make_spring(atom, self.atoms[atom.index+(N+2)], False)
else:
self.make_spring(atom, self.atoms[atom.index+(N+2)], True)
if atom.pos.z == 0:
self.make_spring(self.atoms[atom.index-(N+2)**2], atom, False)
self.make_spring(atom, self.atoms[atom.index+(N+2)**2], True)
elif atom.pos.z == N-1:
self.make_spring(atom, self.atoms[atom.index+(N+2)**2], False)
else:
self.make_spring(atom, self.atoms[atom.index+(N+2)**2], True)
# Create a grid of springs linking each atom to the adjacent atoms
# in each dimension, or to invisible motionless atoms
def make_spring(self, start, end, visible):
spring = helix()
spring.pos = start.pos
spring.axis = end.pos-start.pos
spring.visible = visible
spring.thickness = 0.05
spring.radius = 0.5*atom_radius
spring.length = spacing
spring.start = start
spring.end = end
spring.color = color.orange
self.springs.append(spring)
c = crystal(N, atom_radius, spacing, 0.1*spacing*sqrt(k/m))
while True:
rate(60)
for atom in c.atoms:
if atom.visible:
atom.pos = atom.pos + atom.momentum/m*dt
for spring in c.springs:
spring.axis = spring.end.pos - spring.start.pos
L = mag(spring.axis)
spring.axis = spring.axis.norm()
spring.pos = spring.start.pos+0.5*atom_radius*spring.axis
Ls = L-atom_radius
spring.length = Ls
Fdt = spring.axis * (k*dt * (1-spacing/L))
if spring.start.visible:
spring.start.momentum = spring.start.momentum + Fdt
if spring.end.visible:
spring.end.momentum = spring.end.momentum - Fdt
I would like to save a series of screenshots of this output. Any help would be appreciated.
From the documentation at https://www.glowscript.org/docs/VPythonDocs/canvas.html:
capture(filename) Sends to your Download folder a png screen shot of the canvas. If filename is the string "boxes" or "boxes.png" the file will be named "boxes.png".
My code dies after about 140+ iterations, and I don't know why. I guess memory leak is a possibility, but I couldn't find it. I also found out that changing some arithmetic constants can prolong the time until the crash.
I have a genetic algorithm that tries to find best (i.e. minimal steps) route from point A (src) to point B (dst).
I create a list of random chromosomes, where each chromosome has:
src + dst [always the same]
list of directions (random)
I then run the algorithm:
find best route and draw it (for visualization purposes)
Given a probability P - replace the chromosomes with cross-overs (i.e. pick 2, and take the "end" of one's directions, and replace the "end" of the second's)
Given probability Q - mutate (replace the next direction with a random direction)
This all goes well, and most of the times I do find a route (usually not the ideal one), but sometimes, when it searches for a long time (say, about 140+ iterations) it just crushes. No warning. No error.
How can I prevent that (a simple iteration limit can work, but I do want the algorithm to run for a long time [~2000+ iterations])?
I think the relevant parts of the code are:
update function inside GUI class
which calls to cross_over
When playing with the update_fitness() score values (changing score -= (weight+1)*2000*(shift_x + shift_y) to score -= (weight+1)*2*(shift_x + shift_y) it runs for a longer time. Could be some kind of an arithmetic overflow?
import tkinter as tk
from enum import Enum
from random import randint, sample
from copy import deepcopy
from time import sleep
from itertools import product
debug_flag = False
class Direction(Enum):
Up = 0
Down = 1
Left = 2
Right = 3
def __str__(self):
return str(self.name)
def __repr__(self):
return str(self.name)[0]
# A chromosome is a list of directions that should lead the way from src to dst.
# Each step in the chromosome is a direction (up, down, right ,left)
# The chromosome also keeps track of its route
class Chromosome:
def __init__(self, src = None, dst = None, length = 10, directions = None):
self.MAX_SCORE = 1000000
self.route = [src]
if not directions:
self.directions = [Direction(randint(0,3)) for i in range(length)]
else:
self.directions = directions
self.src = src
self.dst = dst
self.fitness = self.MAX_SCORE
def __str__(self):
return str(self.fitness)
def __repr__(self):
return self.__str__()
def set_src(self, pixel):
self.src = pixel
def set_dst(self, pixel):
self.dst = pixel
def set_directions(self, ls):
self.directions = ls
def update_fitness(self):
# Higher score - a better fitness
score = self.MAX_SCORE - len(self.route)
score += 4000*(len(set(self.route)) - len(self.route)) # penalize returning to the same cell
score += (self.dst in self.route) * 500 # bonus routes that get to dst
for weight,cell in enumerate(self.route):
shift_x = abs(cell[0] - self.dst[0])
shift_y = abs(cell[1] - self.dst[1])
score -= (weight+1)*2000*(shift_x + shift_y) # penalize any wrong turn
self.fitness = max(score, 0)
def update(self, mutate_chance = 0.9):
# mutate #
self.mutate(chance = mutate_chance)
# move according to direction
last_cell = self.route[-1]
try:
direction = self.directions[len(self.route) - 1]
except IndexError:
print('No more directions. Halting')
return
if direction == Direction.Down:
x_shift, y_shift = 0, 1
elif direction == Direction.Up:
x_shift, y_shift = 0, -1
elif direction == Direction.Left:
x_shift, y_shift = -1, 0
elif direction == Direction.Right:
x_shift, y_shift = 1, 0
new_cell = last_cell[0] + x_shift, last_cell[1] + y_shift
self.route.append(new_cell)
self.update_fitness()
def cross_over(p1, p2, loc = None):
# find the cross_over point
if not loc:
loc = randint(0,len(p1.directions))
# choose one of the parents randomly
x = randint(0,1)
src_parent = (p1, p2)[x]
dst_parent = (p1, p2)[1 - x]
son = deepcopy(src_parent)
son.directions[loc:] = deepcopy(dst_parent.directions[loc:])
return son
def mutate(self, chance = 1):
if 100*chance > randint(0,99):
self.directions[len(self.route) - 1] = Direction(randint(0,3))
class GUI:
def __init__(self, rows = 10, cols = 10, iteration_timer = 100, chromosomes = [], cross_over_chance = 0.5, mutation_chance = 0.3, MAX_ITER = 100):
self.rows = rows
self.cols = cols
self.canv_w = 800
self.canv_h = 800
self.cell_w = self.canv_w // cols
self.cell_h = self.canv_h // rows
self.master = tk.Tk()
self.canvas = tk.Canvas(self.master, width = self.canv_w, height = self.canv_h)
self.canvas.pack()
self.rect_dict = {}
self.iteration_timer = iteration_timer
self.iterations = 0
self.MAX_ITER = MAX_ITER
self.chromosome_list = chromosomes
self.src = chromosomes[0].src # all chromosomes share src + dst
self.dst = chromosomes[0].dst
self.prev_best_route = []
self.cross_over_chance = cross_over_chance
self.mutation_chance = mutation_chance
self.no_obstacles = True
# init grid #
for r in range(rows):
for c in range(cols):
self.rect_dict[(r, c)] = self.canvas.create_rectangle(r *self.cell_h, c *self.cell_w,
(1+r)*self.cell_h, (1+c)*self.cell_w,
fill="gray")
# init grid #
# draw src + dst #
self.color_src_dst()
# draw src + dst #
# after + mainloop #
self.master.after(iteration_timer, self.start_gui)
tk.mainloop()
# after + mainloop #
def start_gui(self):
self.start_msg = self.canvas.create_text(self.canv_w // 2,3*self.canv_h // 4, fill = "black", font = "Times 25 bold underline",
text="Starting new computation.\nPopulation size = %d\nCross-over chance = %.2f\nMutation chance = %.2f" %
(len(self.chromosome_list), self.cross_over_chance, self.mutation_chance))
self.master.after(2000, self.update)
def end_gui(self, msg="Bye Bye!"):
self.master.wm_attributes('-alpha', 0.9) # transparency
self.canvas.create_text(self.canv_w // 2,3*self.canv_h // 4, fill = "black", font = "Times 25 bold underline", text=msg)
cell_ls = []
for idx,cell in enumerate(self.prev_best_route):
if cell in cell_ls:
continue
cell_ls.append(cell)
self.canvas.create_text(cell[0]*self.cell_w, cell[1]*self.cell_h, fill = "purple", font = "Times 16 bold italic", text=str(idx+1))
self.master.after(3000, self.master.destroy)
def color_src_dst(self):
r_src = self.rect_dict[self.src]
r_dst = self.rect_dict[self.dst]
c_src = 'blue'
c_dst = 'red'
self.canvas.itemconfig(r_src, fill=c_src)
self.canvas.itemconfig(r_dst, fill=c_dst)
def color_route(self, route, color):
for cell in route:
try:
self.canvas.itemconfig(self.rect_dict[cell], fill=color)
except KeyError:
# out of bounds -> ignore
continue
# keep the src + dst
self.color_src_dst()
# keep the src + dst
def compute_shortest_route(self):
if self.no_obstacles:
return (1 +
abs(self.chromosome_list[0].dst[0] - self.chromosome_list[0].src[0]) +
abs(self.chromosome_list[0].dst[1] - self.chromosome_list[0].src[1]))
else:
return 0
def create_weighted_chromosome_list(self):
ls = []
for ch in self.chromosome_list:
tmp = [ch] * (ch.fitness // 200000)
ls.extend(tmp)
return ls
def cross_over(self):
new_chromosome_ls = []
weighted_ls = self.create_weighted_chromosome_list()
while len(new_chromosome_ls) < len(self.chromosome_list):
try:
p1, p2 = sample(weighted_ls, 2)
son = Chromosome.cross_over(p1, p2)
if son in new_chromosome_ls:
continue
else:
new_chromosome_ls.append(son)
except ValueError:
continue
return new_chromosome_ls
def end_successfully(self):
self.end_gui(msg="Got to destination in %d iterations!\nBest route length = %d" % (len(self.prev_best_route), self.compute_shortest_route()))
def update(self):
# first time #
self.canvas.delete(self.start_msg)
# first time #
# end #
if self.iterations >= self.MAX_ITER:
self.end_gui()
return
# end #
# clean the previously best chromosome route #
self.color_route(self.prev_best_route[1:], 'gray')
# clean the previously best chromosome route #
# cross over #
if 100*self.cross_over_chance > randint(0,99):
self.chromosome_list = self.cross_over()
# cross over #
# update (includes mutations) all chromosomes #
for ch in self.chromosome_list:
ch.update(mutate_chance=self.mutation_chance)
# update (includes mutations) all chromosomes #
# show all chromsome fitness values #
if debug_flag:
fit_ls = [ch.fitness for ch in self.chromosome_list]
print(self.iterations, sum(fit_ls) / len(fit_ls), fit_ls)
# show all chromsome fitness values #
# find and display best chromosome #
best_ch = max(self.chromosome_list, key=lambda ch : ch.fitness)
self.prev_best_route = deepcopy(best_ch.route)
self.color_route(self.prev_best_route[1:], 'gold')
# find and display best chromosome #
# check if got to dst #
if best_ch.dst == best_ch.route[-1]:
self.end_successfully()
return
# check if got to dst #
# after + update iterations #
self.master.after(self.iteration_timer, self.update)
self.iterations += 1
# after + update iterations #
def main():
iter_timer, ITER = 10, 350
r,c = 20,20
s,d = (13,11), (7,8)
population_size = [80,160]
cross_over_chance = [0.2,0.4,0.5]
for pop_size, CO_chance in product(population_size, cross_over_chance):
M_chance = 0.7 - CO_chance
ch_ls = [Chromosome(src=s, dst=d, directions=[Direction(randint(0,3)) for i in range(ITER)]) for i in range(pop_size)]
g = GUI(rows=r, cols=c, chromosomes = ch_ls, iteration_timer=iter_timer,
cross_over_chance=CO_chance, mutation_chance=M_chance, MAX_ITER=ITER-1)
del(ch_ls)
del(g)
if __name__ == "__main__":
main()
I do not know if you know the Python Profiling tool of Visual Studio, but it is quite useful in cases as yours (though I usually program with editors, like VS Code).
I have run your program and, as you said, it sometimes crashes. I have analyzed the code with the profiling tool and it seems that the problem is the function cross_over, specifically the random function:
I would strongly suggest reviewing your cross_over and mutation functions. The random function should not be called so many times (2 millions).
I have previously programmed Genetic Algorithms and, to me, it seems that your program is falling into a local minimum. What is suggested in these cases is playing with the percentage of mutation. Try to increase it a little bit so that you could get out of the local minimum.
The scipy library of matplotlib in Python 3 contains a function scipy.signal.freqz which computes the frequency response of a digital filter.
I am trying to write my own version of this function, to relay the same output as this embedded function. I plan to use the inputs:
freqz(b, a = 1, worN=None, whole=None):
[b=numerator of linear filter;
a=denominator of linear filter;
worN=at none, compute at 512 frequencies equally spaced around the unit circle;
whole=at none, frequency computed around upper half of unit circle, from 0 to pi]
This function should output two outputs:
w and h
[w=ndarray, the normalized frequencies at which h was computed, in radians/sample;
h=ndarray, the frequency response]
current code:
def freqz(b, a=1, worN=None, whole=False):
#Initialize a
a = atleast_1d(a)
#Initialize b
b = atleast_1d(b)
#Check condition
if worN is None:
#Initialize worN
worN = 512
#Initialize
h = None
#Try block
try:
#Update
worN = operator.index(worN)
#Exception
except TypeError:
#Update
w = atleast_1d(worN)
#Otherwise
else:
#Check condition
if worN < 0:
#Throw an exception
raise ValueError('worN should be non negative, we got %s' % (worN,))
#Update
finalpoint = 2 * pi if whole else pi
#Update
w = np.linspace(0, finalpoint, worN, endpoint=False)
#Check condition
if (a.size == 1 and worN >= b.shape[0] and fftpack.next_fast_len(worN) == worN and (b.ndim == 1 or (b.shape[-1] == 1))):
#Update
nfft = worN if whole else worN * 2
#Check condition
if np.isrealobj(b) and np.isrealobj(a):
#Update
fftfunc = np.fft.rfft
#Otherwise
else:
#Update
fftfunc = fftpack.fft
#Update
h = fftfunc(b, n=nfft, axis=0)[:worN]
#Update
h /= a
#Check condition
if fftfunc is np.fft.rfft and whole:
#Update
stop = -1 if nfft % 2 == 1 else -2
#Update
hVal_flip = slice(stop, 0, -1)
#Update
h = np.concatenate((h, h[hVal_flip].conj()))
#Check condition
if b.ndim > 1:
#Update
h = h[..., 0]
#Update
h = np.rollaxis(h, 0, h.ndim)
#Delete
del worN
#Check condition
if h is None:
#Update
zmVal = exp(-1j * w)
#Update
h = (npp_polyval(zmVal, b, tensor=False)/npp_polyval(zmVal, a, tensor=False))
#Check condition
return w, h
I am working on a program that estimates the statistic Tajima's D in a series of sliding windows across a chromosome. The chromosome itself is also divided into a number of different regions with (hopefully) functional significance. The sliding window analysis is performed by my script on each region.
At the start of the program, I define the size of the sliding windows and the size of the steps that move from one window to the next. I import a file which contains the coordinates for each different chromosomal region, and import another file which contains all the SNP data I am working with (this is read line-by-line, as it is a large file). The program loops through the list of chromosomal locations. For each location, it generates an index of steps and windows for the analysis, partitions the SNP data into output files (corresponding with the steps), calculates key statistics for each step file, and combines these statistics to estimate Tajima's D for each window.
The program works well for small files of SNP data. It also works well for the first iteration over the first chromosomal break point. However, for large files of SNP data, the step size in the analysis is inexplicably decreased as the program iterates over each chromosomal regions. For the first chromosomal regions, the step size is 2500 nucleotides (this is what it is suppose to be). For the second chromosome segment, however, the step size is 1966, and for the third it is 732.
If anyone has any suggestions at to why this might be the case, please let me know. I am especially stumped as this program seems to work size for small files but not for larger ones.
My code is below:
import sys
import math
import fileinput
import shlex
import string
windowSize = int(500)
stepSize = int(250)
n = int(50) #number of individuals in the anaysis
SNP_file = open("SNPs-1.txt",'r')
SNP_file.readline()
breakpoints = open("C:/Users/gwilymh/Desktop/Python/Breakpoint coordinates.txt", 'r')
breakpoints = list(breakpoints)
numSegments = len(breakpoints)
# Open a file to store the Tajima's D results:
outputFile = open("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/Tajima's D estimates.txt", 'a')
outputFile.write(str("segmentNumber\tchrSegmentName\tsegmentStart\tsegmentStop\twindowNumber\twindowStart\twindowStop\tWindowSize\tnSNPs\tS\tD\n"))
#Calculating parameters a1, a2, b1, b2, c1 and c2
numPairwiseComparisons=n*((n-1)/2)
b1=(n+1)/(3*(n-1))
b2=(2*(n**2+n+3))/(9*n*(n-1))
num=list(range(1,n)) # n-1 values as a list
i=0
a1=0
for i in num:
a1=a1+(1/i)
i=i+1
j=0
a2=0
for j in num:
a2=a2+(1/j**2)
j=j+1
c1=(b1/a1)-(1/a1**2)
c2=(1/(a1**2+a2))*(b2 - ((n+2)/(a1*n))+ (a2/a1**2) )
counter6=0
#For each segment, assign a number and identify the start and stop coodrinates and the segment name
for counter6 in range(counter6,numSegments):
segment = shlex.shlex(breakpoints[counter6],posix = True)
segment.whitespace += '\t'
segment.whitespace_split = True
segment = list(segment)
segmentName = segment[0]
segmentNumber = int(counter6+1)
segmentStartPos = int(segment[1])
segmentStopPos = int(segment[2])
outputFile1 = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_Count of SNPs and mismatches per step.txt")%(str(segmentNumber),str(segmentName))), 'a')
#Make output files to index the lcoations of each window within each segment
windowFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_windowFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'a')
k = segmentStartPos - 1
windowNumber = 0
while (k+1) <=segmentStopPos:
windowStart = k+1
windowNumber = windowNumber+1
windowStop = k + windowSize
if windowStop > segmentStopPos:
windowStop = segmentStopPos
windowFileIndex.write(("%s\t%s\t%s\n")%(str(windowNumber),str(windowStart),str(windowStop)))
k=k+stepSize
windowFileIndex.close()
# Make output files for each step to export the corresponding SNP data into + an index of these output files
stepFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_stepFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'a')
i = segmentStartPos-1
stepNumber = 0
while (i+1) <= segmentStopPos:
stepStart = i+1
stepNumber = stepNumber+1
stepStop = i+stepSize
if stepStop > segmentStopPos:
stepStop = segmentStopPos
stepFile = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(stepNumber))), 'a')
stepFileIndex.write(("%s\t%s\t%s\n")%(str(stepNumber),str(stepStart),str(stepStop)))
i=i+stepSize
stepFile.close()
stepFileIndex.close()
# Open the index file for each step in current chromosomal segment
stepFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_stepFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'r')
stepFileIndex = list(stepFileIndex)
numSteps = len(stepFileIndex)
while 1:
currentSNP = SNP_file.readline()
if not currentSNP: break
currentSNP = shlex.shlex(currentSNP,posix=True)
currentSNP.whitespace += '\t'
currentSNP.whitespace_split = True
currentSNP = list(currentSNP)
SNPlocation = int(currentSNP[0])
if SNPlocation > segmentStopPos:break
stepIndexBin = int(((SNPlocation-segmentStartPos-1)/stepSize)+1)
#print(SNPlocation, stepIndexBin)
writeFile = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(stepIndexBin))), 'a')
writeFile.write((("%s\n")%(str(currentSNP[:]))))
writeFile.close()
counter3=0
for counter3 in range(counter3,numSteps):
# open up each step in the list of steps across the chromosomal segment:
L=shlex.shlex(stepFileIndex[counter3],posix=True)
L.whitespace += '\t'
L.whitespace_split = True
L=list(L)
#print(L)
stepNumber = int(L[0])
stepStart = int(L[1])
stepStop = int(L[2])
stepSize = int(stepStop-(stepStart-1))
#Now open the file of SNPs corresponding with the window in question and convert it into a list:
currentStepFile = open(("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(counter3+1)),'r')
currentStepFile = list(currentStepFile)
nSNPsInCurrentStepFile = len(currentStepFile)
print("number of SNPs in this step is:", nSNPsInCurrentStepFile)
#print(currentStepFile)
if nSNPsInCurrentStepFile == 0:
mismatchesPerSiteList = [0]
else:
# For each line of the file, estimate the per site parameters relevent to Tajima's D
mismatchesPerSiteList = list()
counter4=0
for counter4 in range(counter4,nSNPsInCurrentStepFile):
CountA=0
CountG=0
CountC=0
CountT=0
x = counter4
lineOfData = currentStepFile[x]
counter5=0
for counter5 in range(0,len(lineOfData)):
if lineOfData[counter5]==("A" or "a"): CountA=CountA+1
elif lineOfData[counter5]==("G" or "g"): CountG=CountG+1
elif lineOfData[counter5]==("C" or "c"): CountC=CountC+1
elif lineOfData[counter5]==("T" or "t"): CountT=CountT+1
else: continue
AxG=CountA*CountG
AxC=CountA*CountC
AxT=CountA*CountT
GxC=CountG*CountC
GxT=CountG*CountT
CxT=CountC*CountT
NumberMismatches = AxG+AxC+AxT+GxC+GxT+CxT
mismatchesPerSiteList=mismatchesPerSiteList+[NumberMismatches]
outputFile1.write(str(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n")%(segmentNumber, segmentName,stepNumber,stepStart,stepStop,stepSize,nSNPsInCurrentStepFile,sum(mismatchesPerSiteList))))
outputFile1.close()
windowFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_windowFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'r')
windowFileIndex = list(windowFileIndex)
numberOfWindows = len(windowFileIndex)
stepData = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_Count of SNPs and mismatches per step.txt")%(str(segmentNumber),str(segmentName))), 'r')
stepData = list(stepData)
numberOfSteps = len(stepData)
counter = 0
for counter in range(counter, numberOfWindows):
window = shlex.shlex(windowFileIndex[counter], posix = True)
window.whitespace += "\t"
window.whitespace_split = True
window = list(window)
windowNumber = int(window[0])
firstCoordinateInCurrentWindow = int(window[1])
lastCoordinateInCurrentWindow = int(window[2])
currentWindowSize = lastCoordinateInCurrentWindow - firstCoordinateInCurrentWindow +1
nSNPsInThisWindow = 0
nMismatchesInThisWindow = 0
counter2 = 0
for counter2 in range(counter2,numberOfSteps):
step = shlex.shlex(stepData[counter2], posix=True)
step.whitespace += "\t"
step.whitespace_split = True
step = list(step)
lastCoordinateInCurrentStep = int(step[4])
if lastCoordinateInCurrentStep < firstCoordinateInCurrentWindow: continue
elif lastCoordinateInCurrentStep <= lastCoordinateInCurrentWindow:
nSNPsInThisStep = int(step[6])
nMismatchesInThisStep = int(step[7])
nSNPsInThisWindow = nSNPsInThisWindow + nSNPsInThisStep
nMismatchesInThisWindow = nMismatchesInThisWindow + nMismatchesInThisStep
elif lastCoordinateInCurrentStep > lastCoordinateInCurrentWindow: break
if nSNPsInThisWindow ==0 :
S = 0
D = 0
else:
S = nSNPsInThisWindow/currentWindowSize
pi = nMismatchesInThisWindow/(currentWindowSize*numPairwiseComparisons)
print(nSNPsInThisWindow,nMismatchesInThisWindow,currentWindowSize,S,pi)
D = (pi-(S/a1))/math.sqrt(c1*S + c2*S*(S-1/currentWindowSize))
outputFile.write(str(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n")%(segmentNumber,segmentName,segmentStartPos,segmentStopPos,windowNumber,firstCoordinateInCurrentWindow,lastCoordinateInCurrentWindow,currentWindowSize,nSNPsInThisWindow,S,D)))
A quick search shows that you do change your stepSize on line 110:
stepStart = int(L[1])
stepStop = int(L[2])
stepSize = int(stepStop-(stepStart-1))
stepStop and stepStart appear to depend on your files' contents, so we can't debug it further.