Monte-Carlo simulation of protein structure in Python

Monte-Carlo simulation of protein structure in Python - python

I'm doing a project at a university where I have to use a monte carlo simulation to determine the structure of a protein, so far my code looks like this:
from Bio.PDB import PDBIO
import random
import math
N = 100
sequence = ''.join([random.choice(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']) for i in range(N)])
temperature = 300 # temperatura w kelwinach
n_trials = 10000 # liczba prób
boltzmann = 1.380649e-23
conformation = [[random.uniform(-180, 180) for j in range(3)] for i in range(N)]
def energy(sequence, conformation):
e = 0
for i in range(N-1):
for j in range(i+1, N):
dist = math.sqrt(sum([(conformation[i][k]-conformation[j][k])**2 for k in range(3)]))
e += 1/dist
return e
for i in range(n_trials):
i = random.randint(0, N-1)
new_angle = random.uniform(-180, 180)
j = random.randint(0, 2)
old_conformation = conformation[i][j]
conformation[i][j] = new_angle
new_energy = energy(sequence, conformation)
delta_energy = new_energy - old_energy
prob = math.exp(-delta_energy/(boltzmann*temperature))
r = random.random()
if r < prob:
pass
else:
conformation[i][j] = old_conformation
final_conformation = conformation
print(final_conformation)
So far, I've taken random data. What protein is better to take for this kind of simulation and how can this code be improved? Is it possible to enter the simulation result into VMD to generate a 3-D protein structure?

Related

How to fix for loop for a list

I am trying to create y as an array to create a function iterating through zeta which is dependent upon E all using a for loop. However the values are not being added to the list.
I have also tried defining the variables and the mathematical function as two different coding functions
screenshot
import cmath
import matplotlib.pyplot as plt
a = 2*10**-15
Vo = 83*10**6
m = 1.6726*10**(-27)
pi = cmath.pi
E = []
E.append(-83*10**6)
hbar = 6.62607015*10**(-34)/ pi
K = 16.032280*10**6
y = []
y.append(51311.18131)
def variables(y, E):
for i in range(1, 83, 1):
alpha = cmath.sqrt(2*m*(E[i-1]+Vo)/(hbar**2))
zeta = alpha*a
eta = cmath.sqrt(k - zeta**2)
y[i] = zeta*cmath.tan(zeta) - eta
E[i] = E[i-1] + 1
return y, E
print('E = ', E, 'Y = ', y)
plt.plot(E, y)
The program as of now should graph y values as a function of Zeta which is changing with energy.

You don't need the loop to be in a function, just put it at the top-level of the script. And use y.append() and E.append() to add to those lists.
for i in range(1, 83):
alpha = cmath.sqrt(2*m*(E[i-1]+Vo)/(hbar**2))
zeta = alpha*a
eta = cmath.sqrt(k - zeta**2)
y.append(zeta*cmath.tan(zeta) - eta)
E.append(E[i-1] + 1)

In addition to #Barmar's answer your k variable needs to be K (upper case).
import cmath
import matplotlib.pyplot as plt
a = 2*10**-15
Vo = 83*10**6
m = 1.6726*10**(-27)
pi = cmath.pi
E = [0] * 83
E.append(-83*10**6)
hbar = 6.62607015*10**(-34)/ pi
K = 16.032280*10**6
y = [0] * 83
y.append(51311.18131)
for i in range(1, 83, 1):
alpha = cmath.sqrt(2*m*(E[i-1]+Vo)/(hbar**2))
zeta = alpha*a
eta = cmath.sqrt(K - zeta**2)
y[i] = zeta*cmath.tan(zeta) - eta
E[i] = E[i-1] + 1
print('E = ', E, 'Y = ', y)
plt.plot(E, y)
Also it is not required to use append as sometimes append doesn't always work well with calculated index lookups. It might be better for you to initialize the y and E lists to be the length of your loop first.

cvxpy+ecos: problem INFEASIBLE, how to scale correctly

I have the following code:
import numpy as np
import cvxpy as cp
import math
import sys
def solve05( p, a ):
m,n,ids,inv,k = 0,len(p),{},{},0
for i in range(n):
for j in range(n):
ids[(i,j)] = k
inv[k] = (i,j)
k = k+1
# Problem data
A = np.zeros((2*n,n*n+n))
D = np.zeros((2*n,n*n+n))
b = np.zeros(2*n)
B = np.zeros(2*n)
c = np.zeros(2*n)
for j in range(n):
for i in range(n):
idx = ids[(i,j)]
A[j,idx] = 1
b[j] = 1
for i in range(n):
for j in range(n):
idx = ids[(i,j)]
A[i+n,idx] = p[j]
A[i+n,n*n+i] = -1
b[i+n] = p[i]
# Construct the problem
x = cp.Variable(n*n+n)
print("M = ",A)
print("b = ",b)
CF = 1e3
print("Now scaling M by ",CF)
A = A*CF
print(A)
b = b*CF
constraints = [0 <= x, A*x == b]
pex = x[n*n]+x[n*n+1]+x[n*n+2]+1
constraints.append(x[n*n] <= a[0]*CF)
constraints.append(x[n*n+1] <= a[1]*CF)
constraints.append(x[n*n+2] <= a[2]*CF)
constraints.append(x[n*n] >= 0.01)
constraints.append(x[n*n+1] >= 0.01)
constraints.append(x[n*n+2] >= 0.01)
ex = pex.__pow__(-1)
print("Dummy variables: ",x[n*n],x[n*n+1],x[n*n+2])
print("Objective function: ",ex)
print("[should be convex] Curvature: ",ex.curvature)
objective = cp.Minimize(ex)
prob = cp.Problem(objective,constraints)
result = prob.solve(verbose=True)
print('problem state: ', prob.status)
alpha = np.zeros((n,n))
for i in range(n):
for j in range(n):
alpha[i,j] = x.value[ids[(i,j)]]
dummy = [x.value[j] for j in range(n*n,n*n+n)]
return (x,alpha)
if __name__ == '__main__':
p = [0.0005,0.0001,0.0007]
a = [900,500,700]
n = len(a)
(sl,alpha) = solve05(p,a)
for row in alpha:
for x in row:
print("%.4f " % (x), end=" "),
print("")
It fails with "Problem UNFEASIBLE" verdict, and I am eager to know why.
Is there any way to know more? I am not a convex programming expert, so any comments on why this is a bad model is appreciated. I have also tried scaling the problem, because I thought some numerical instability may be what is causing problems, but alas.

The answer ecos+cvxpy was giving is correct. The problem is unfeasible, which can be shown by summing up all the equations and observing that the LHS is some quantity F, whereas the RHS is F+e, for some e > 0.

Label each output of a multidimensional array being plotted

Working example:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
def BoatFishSystem(state, t, *args):
'''http://www.danham.me/r/2015/10/29/differential-eq.html'''
A = args[0]
B = args[1]
C = args[2]
fish, boat = state
d_fish = fish * (A - boat - fish)
d_boat = -boat * (B - C * fish)
return [d_fish, d_boat]
A = 2.0
B = 1.0
C = 1.5
params = (A, B, C)
n_steps = 100000
t = np.linspace(0, 20, n_steps)
init_state = [1, 1]
species = odeint(BoatFishSystem, init_state, t, args = (params))
plt.plot(t, species, label = ['a', 'b'])
plt.legend()
plt.show()
Desired result:
One output line is labelled 'a', the other one 'b'
Current result:
Both lines are labelled ['a', 'b']

Move the labels into plt.legend():
plt.plot(t, species)
plt.legend(['a', 'b'])

Build Element of Multivariate Polynomial Ring in loop

So I was wondering (or don't understand the docs right) how to cast a string to be interpreted as Polynomial of a given ring.
So I know how to build the string in python for given definition but I have no idea how to cast that to the polynomial ring

So I figured it out. I needed to unterstand the ring.gens() so that I can use them as variables
def build(alpha, beta, gamma):
ring = PolynomialRing(GF(q, 'a'), 'x', 6)
poly = alpha
x = ring.gens()
for j in range(0, self.v):
for k in range(0, self.n):
if j is k:
poly += gamma[j, k] * x[j]
else:
poly += gamma[j, k] * x[j] * x[k]
for k in range(0, self.n):
poly += beta[k] * x[k]
return poly

Python: A more Pythonic way to loop through grid classifications

I have an 11-by-11 size grid that is placed over a scatter plot. The scatter plot is of 100 randomly generated pairs. Within each grid space, is a classification type, where:
Type A is greater than 0, but less than 0.5 in both the X and Y axes,
Type B is greater than 0.5, but less than 1.5 in both the X and Y axes
ect...
I want to know how many points are within each grid space, as well as the pairs that exist in that grid space. This part isn't a problem, I just want to know if there is a more pythonic way to write my loop, since I don't want to have to write an if statement for each grid space.
My script is as follows:
TypeA = []
TypeB = []
fig = plt.figure()
ax = fig.gca()
ax.set_xticks(np.arange(0.5, 10.5, 1))
ax.set_yticks(np.arange(0.5, 10.5, 1))
for ii in range(100):
RNumX = randint(0, 10)
RNumY = randint(0, 10)
print RNumX, RNumY
hold(True)
plot1 = plt.scatter(RNumX, RNumY)
if RNumX >= 0 and RNumX < 0.5:
if RNumY >= 0 and RNumY < 0.5:
PairA = (RNumX, RNumY)
TypeA.append(PairA)
elif RNumY >= 0.5 and RNumY < 1.5:
PairB = (RNumX, RNumY)
TypeB.append(PairB)
SumA = len(TypeA)
SumB = len(TypeB)
print TypeA, SumA
print TypeB, SumB
plt.grid()
plt.show()

You could make Type a matrix and round the values to find your indices:
from random import random
# An 11 x 11 matrix of lists
Type = 11 * (11 * ([],),)
fig = plt.figure()
ax = fig.gca()
ax.set_xticks(np.arange(0.5, 10.5, 1))
ax.set_yticks(np.arange(0.5, 10.5, 1))
for ii in range(100):
# If you want to use floats in stead of ints
RNumX = 0.5 + 10 * random()
RNumY = 0.5 + 10 * random()
print RNumX, RNumY
hold(True)
plot1 = plt.scatter(RNumX, RNumY)
# Round the coordinates to find the indices
Type[int(RNumX + 0.5)][int(RNumY + 0.5)].append((RNumX, RNumY))
# Print all buckets as your snippet implies
for x in Type:
for y in x:
print y, len(y)
# Print only buckets with both values in the same range as your question implies
for x in range(11):
print Type[x][x], len(Type[x][x])
plt.grid()
plt.show()

you could use the bisect module to avoid those if statements. Here is a quick-and-dirty example.
from functools import total_ordering
import bisect
#total_ordering
class Point:
def __init__(self, X, Y):
self.X = X
self.Y = Y
def __eq__(self, other):
return self.X == other.X and self.Y == other.Y
def __lt__(self, other):
return self.X < other.X and self.Y < other.Y
print [(float(x)/2, float(x)/2) for x in xrange(0, 23)]
[(0.0, 0.0), (0.5, 0.5), (1.0, 1.0), (1.5, 1.5), (2.0, 2.0), (2.5, 2.5), (3.0, 3.0), (3.5, 3.5), (4.0, 4.0), (4.5, 4.5), (5.0, 5.0), (5.5, 5.5), (6.0, 6.0), (6.5, 6.5), (7.0, 7.0), (7.5, 7.5), (8.0, 8.0), (8.5, 8.5), (9.0, 9.0), (9.5, 9.5), (10.0, 10.0), (10.5, 10.5), (11.0, 11.0)]
points = [Point(float(x)/2, float(x)/2) for x in xrange(0, 23)]
types = [chr(x) for x in xrange(65, 91)]
print types
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
print types[bisect.bisect(points, Point(0.1, 0.1)) - 1]
A
print types[bisect.bisect(points, Point(0.6, 0.6)) - 1]
B

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Monte-Carlo simulation of protein structure in Python - python

Related

How to fix for loop for a list

cvxpy+ecos: problem INFEASIBLE, how to scale correctly

Label each output of a multidimensional array being plotted

Build Element of Multivariate Polynomial Ring in loop

Python: A more Pythonic way to loop through grid classifications

Categories

Resources