Degree, Proximity and Rank Prestige - python

I want to find these three Prestige measures for an existing graph using python:
Degree Prestige
Proximity Prestige
Rank Prestige
Can I use networkx for this purpose? If not, then which library can I use and how can I do it. Any links or references are appreciated.

Yes, you can but you to implement the measures by yourself as far as I know.
For instance, consider the Degree prestige defined as the number of incoming links to a node divided by the total possible number of incoming links.
In this case you could just calculate it as:
n_nodes = 10
d = nx.gnp_random_graph(n_nodes, 0.5, directed=True)
degree_prestige = dict((v,len(d.in_edges(v))/(n_nodes-1)) for v in d.nodes_iter())
Same for the other measures which can be easily implemented used the functions defined by networkx.

n_nodes = 5
d = nx.gnp_random_graph(n_nodes, 0.5, directed=True)
degree_prestige = dict((v,len(d.in_edges(v))/(n_nodes-1)) for v in d.nodes())
print("DEGREE PRESTIGE :\n")
for i in degree_prestige:
print(i, " : ", degree_prestige[i])
distance = []
temp_dis = 0
n = 0
for dest in d.nodes:
temp_dis = 0
n = 0
for src in d.nodes:
if (nx.has_path(d,src,dest) == True):
temp_dis = temp_dis + nx.shortest_path_length(d,source = src,target = dest)
n = n + 1
if temp_dis == 0:
distance.append([dest, 0])
else:
distance.append([dest, temp_dis/(n - 1)])
print("\nPROXIMITY PRESTIGE :\n")
for i in distance:
print(str(i[0]) + " : " + str(i[1]))
prominance = np.random.randint(1, 4, size=n_nodes)
print("\nASSUME PROMINANCE :\n")
print(prominance)
rank_prestige = np.zeros([n_nodes], dtype = int)
path_matrix = np.zeros([n_nodes, n_nodes], dtype = int)
i = 0
j = 0
for src in d.nodes:
for dest in d.nodes:
if d.has_edge(dest, src):
path_matrix[i][j] = 1
j = j+1
j = 0
i = i+1
for i in range(n_nodes):
pr_i = 0
for j in range(n_nodes):
pr_i = pr_i + path_matrix[i][j] * prominance[j]
rank_prestige[i] = pr_i
print("\nRANK PRESTIGE :\n")
print(rank_prestige)

Related

how can I run this code with two loops faster? Can I run it without using for?

I wanna run this code for a wide range instead of this range. So I wanna make it better to run faster.
Is it impossible to use something else instead of these loops?
z1=3
z2=HEIGHT-1
def myfunction(z1,z2):
for l in range(z1):
vector = np.zeros(WIDTH)
vector[WIDTH//2] = 1
result = []
result.append(vector)
for i in range(z2):
vector = doPercolationStep(vector, PROP, i)
result.append(vector)
result = np.array(result)
ss = result.astype(int)
ss = np.where(ss==0, -1, ss)
ww = (ss+(ss.T))/2
re_size = ww/(np.sqrt(L))
matr5 = re_size
np.savetxt('F:/folder/matr5/'+str(l)+'.csv', matr5)
and doPercolationStep is:
WIDTH = 5
HEIGHT = 5
L=5
PROP = 0.6447
def doPercolationStep(vector, PROP, time):
even = time%2 # even is 1 or 0
vector_copy = np.copy(vector)
WIDTH = len(vector)
for i in range(even, WIDTH, 2):
if vector[i] == 1:
pro1 = random.random()
pro2 = random.random()
if pro1 < PROP:
vector_copy[(i+WIDTH-1)%WIDTH] = 1 # left neighbour of i
if pro2 < PROP:
vector_copy[(i+1)%WIDTH] = 1 # right neighbour of i
vector_copy[i] = 0
return vector_copy

Setting limits. I want QH = 1 if QH > 1 but I don't know how to do it or where to put it

The code is solving an integral using the trapezium rule. I need to set limits for QH so if QH > 1 then QH = 1. I cant seem to get what I've done below to work properly.
## Solve ODE
QH = odeint(model, QH0, z, atol = 1.0e-8, rtol = 1.0e-8)
QHe = odeint(model1, QHe0, z, atol = 1.0e-8, rtol = 1.8e-8)
if QH > 1:
QH == 1
if QHe > 1:
QHe == 1
#Solving Thomson Optical Depth Integral for Hydrogen
def f_hydrogen(z_in):
Hz = H0*math.sqrt(OMEGAm*((1+z_in)**3)+OMEGAlam)
flatQH = QH.flatten()
QH_int = np.interp(z_in, z[::-1], flatQH[::-1])
return QH_int*(((1+z_in)**2)/Hz)
a = 0
z1 = 7
n = 1000
hei = (z1-a)/n
k = 0
#sum = 0
sum = np.zeros(n+1)
while (k<n):
x_in = a + (k*hei)
if k < n-1 :
sum[k + 1] = sum[k] + f_hydrogen(x_in)
k = k + 1
int_a = (hei/2)*((f_hydrogen(a) + f_hydrogen(z1)) + (2*sum))
tH = (c)*(sigma)*(nbarH)*(1+(y/(4*x)))*(int_a)
for index, val in enumerate(tH):
print("Thomson Optical Depth - Hydrogen = ", index, val)

Manual kth Nearest Neighbor Euclidean Distance

I have to modify the following code in order to use 1, 3, and 5 neighbors and print the accuracy of each one. I can not use the sklearn library KNeighborsClassifier so I am stuck because I don't know how to modify k neighbors value here.
Code:
shuffle_df = dataset.sample(frac=1)
train_size = int(0.75 * len(dataset))
train_set=shuffle_df[:train_size]
test_set=shuffle_df[train_size:]
testarray=[]
for w in range(37):
sepallengthtest = test_set.iloc[w,0]
sepalwidthtest = test_set.iloc[w,1]
petallenghttest = test_set.iloc[w,2]
petalwidthtest = test_set.iloc[w,3]
classtest = test_set.iloc[w,4]
trainarry=[]
for m in range(111):
sepallengthtrain = train_set.iloc[m,0]
widthtesthtrain = train_set.iloc[m,1]
petallenghttrain = train_set.iloc[m,2]
petalwidthtrain = train_set.iloc[m,3]
classtrain = test_set.iloc[w,4]
distance = sqrt((sepallengthtest-sepallengthtrain)**2 + (sepalwidthtest-widthtesthtrain)**2 + (petallenghttest-petallenghttrain)**2 + (petalwidthtest-petalwidthtrain)**2)
if len(trainarry)==0:
trainarry.append([distance, classtest, classtrain])
if trainarry[0][0]>distance:
trainarry.pop(0)
trainarry.append([distance, classtest, classtrain])
testarray.append(trainarry)
count=0
for i in range(37):
results = testarray[i]
if results[0][1] == results[0][2]:
count+=1
print(count)
accuracy=count/len(testarray)
print(f"\n Accuracy{accuracy: .3%}")
Output:
Neighbors: 1 3 5
Success Rate: 86.8% 89.5% 89.5%

Python - Dijkstra's algorithm in O(V+ElogV) time complexity

I would like to implement the above mentioned algorithm in Python. Time Complexity of Dijkstra's Algorithm is O(V2), but I would like to implement it using min-priority queue so it drops down to O(V+ElogV).
Heres an example input:
The program should 2 problems, src: 0 dest: 2 and src: 1 dest: 2. 3 vertexes will be provided by the input and there will be 3 edges provided also, all of these are separated by an empty line.
2
3
3
0 2
1 2
2 0
-4 1
6 3
1 0
1 2
0 2
Solution:
5.0 10.2
Heres my current code:
import math
import sys
def build_graph(edges, weights, e):
graph = edges
for i in range(e):
graph[i].append(weights[i])
return graph
def debug():
print(pontparok)
print(csucsok)
print(utszakaszok)
print(hosszak)
def read(n, bemenet):
for i in range(n):
temp = input().split("\t")
bemenet[i] = temp
bemenet[i][0] = int(bemenet[i][0])
bemenet[i][1] = int(bemenet[i][1])
def dijkstra(edges, src, dest, n):
dist = [0] * n
current = src
for i in range(n):
dist[i] = sys.maxsize
dist[src] = 0
explored = [False] * n
q = 0
while not explored[dest] and q < 1000:
q += 1
min = sys.maxsize
minVertex = current
for edge in edges:
if edge[0] == current and not explored[edge[1]]:
if min > dist[edge[1]]:
min = dist[edge[1]]
minVertex = edge[1]
elif edge[1] == current and not explored[edge[0]]:
if min > dist[edge[0]]:
min = dist[edge[0]]
minVertex = edge[0]
current = minVertex
explored[current] = True
for edge in edges:
if edge[0] == current:
if dist[current] + edge[2] < dist[edge[1]]:
dist[edge[1]] = dist[current] + edge[2]
elif edge[1] == current:
if dist[current] + edge[2] < dist[edge[0]]:
dist[edge[0]] = dist[current] + edge[2]
return round(dist[dest], 2)
p = int(input())
n = int(input())
e = int(input())
input()
pontparok = [[0] * 2] * p
csucsok = [[0] * 2] * n
utszakaszok = [[0] * 2] * e
hosszak = [0] * e
read(p, pontparok)
input()
read(n, csucsok)
input()
read(e, utszakaszok)
for i in range(e):
hosszak[i] = math.sqrt(pow((csucsok[utszakaszok[i][0]][0] - csucsok[utszakaszok[i][1]][0]), 2) + pow((csucsok[utszakaszok[i][0]][1] - csucsok[utszakaszok[i][1]][1]), 2))
#debug()
graph = build_graph(utszakaszok, hosszak, e)
#print(hosszak)
for i in range(p):
if i == p - 1:
print(dijkstra(graph, pontparok[i][0], pontparok[i][1], n))
else:
print(dijkstra(graph, pontparok[i][0], pontparok[i][1], n), end="\t")
import heapq
import math
def read(n, bemenet):
for i in range(n):
temp = input().split("\t")
bemenet[i] = temp
bemenet[i][0] = int(bemenet[i][0])
bemenet[i][1] = int(bemenet[i][1])
def dijkstra(graph, starting_vertex, destination_vertex):
distances = {vertex: float('infinity') for vertex in graph}
distances[starting_vertex] = 0
pq = [(0, starting_vertex)]
while len(pq) > 0:
current_distance, current_vertex = heapq.heappop(pq)
for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))
return round(distances[destination_vertex], 2)
def build_graph(edges, weights, e):
graph = edges
for i in range(e):
graph[i].append(weights[i])
return graph
p = int(input())
n = int(input())
e = int(input())
input()
pontparok = [[0] * 2] * p
csucsok = [[0] * 2] * n
utszakaszok = [[0] * 2] * e
hosszak = [0] * e
read(p, pontparok)
input()
read(n, csucsok)
input()
read(e, utszakaszok)
for i in range(e):
hosszak[i] = math.sqrt(pow((csucsok[utszakaszok[i][0]][0] - csucsok[utszakaszok[i][1]][0]), 2) + pow((csucsok[utszakaszok[i][0]][1] - csucsok[utszakaszok[i][1]][1]), 2))
graph2 = build_graph(utszakaszok, hosszak, e)
graph = { }
[graph.setdefault(i, []) for i in range(n)]
for i in range(n):
graph[i] = {}
for edge in graph2:
graph[edge[0]].update({edge[1]: edge[2]})
graph[edge[1]].update({edge[0]: edge[2]})
for i in range(p):
if i == p - 1:
print(dijkstra(graph, pontparok[i][0], pontparok[i][1]))
else:
print(dijkstra(graph, pontparok[i][0], pontparok[i][1]), end="\t")

How to avoid out of memory python?

I'm new to python and ubuntu. i got killed after running python code. The file I'm using for the code is around 2.7 GB and I have 16 GB RAM with one tera hard ... what should I do to avoid this problem because I'm searching and found it seems to be out of memory problem
I used this command
free -mh
I got
total used free shared buff/cache available
Mem: 15G 2.5G 9.7G 148M 3.3G 12G
Swap: 4.0G 2.0G 2.0G
the code link I tried Link
import numpy as np
import matplotlib.pyplot as plt
class ProcessData(object):
def data_process(self, folder):
'''
:folder: data file path
:rtype: dict pair distance
MAX id number
'''
distance = dict()
max_pt = 0
with open(folder, 'r') as data:
for line in data:
i, j, dis = line.strip().split()
i, j, dis = int(i), int(j), float(dis)
distance[(i, j)] = dis
distance[(j, i)] = dis
max_pt = max(i, j, max_pt)
for num in range(1, max_pt + 1):
distance[(num, num)] = 0
return distance, max_pt
def entropy(self, distance, maxid, factor):
'''
:distance: dict with pair: dist
:factor: impact factor
:maxid: max elem number
:rtype: entropy H in data field
'''
potential = dict()
for i in range(1, maxid + 1):
tmp = 0
for j in range(1, maxid + 1):
tmp += np.exp(-pow(distance[(i, j)] / factor, 2))
potential[i] = tmp
z = sum(potential.values())
H = 0
for i in range(1, maxid + 1):
x = potential[i] / z
H += x * np.log(x)
return -H
def threshold(self, dist, max_id):
'''
:rtype: factor value makes H smallest
'''
entro = 10.0
# given data:
# 0.02139999999999999 7.203581306901208
# 0.02149999999999999 7.203577254067677
# 0.02159999999999999 7.203577734107922
# generate data:
# 0.367020, 6.943842
# 0.368959, 6.943840
# 0.370898, 6.943841
scape = np.linspace(0.330, 0.430, 50)
# 通用数据使用以下一行
# scape = np.linspace(0.001, 1.001, 100)
for factor in scape:
value = self.entropy(dist, max_id, factor)
print('factor: {0:.6f}, entropy: {1:.8f}'.format(factor, value))
# plt.scatter(factor, value, c='r', s=1)
if value and value < entro:
entro, thresh = value, factor
thresh = 3 * thresh / pow(2, 0.5)
"""
plt.xlabel(r'$\sigma$')
plt.ylabel(r'H')
plt.savefig('./images/Entropy test.png')
plt.close()
"""
print('current: ', entro, thresh)
# given data: 7.203577254067677 0.04560838738653229
# generate data: 6.943840312796875 0.7828967189629044
return thresh
def CutOff(self, distance, max_id, threshold):
'''
:rtype: list with Cut-off kernel values by desc
'''
cut_off = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
gap = distance[(i, j)] - threshold
tmp += 0 if gap >= 0 else 1
cut_off[i] = tmp
sorted_cutoff = sorted(cut_off.items(), key=lambda k:k[1], reverse=True)
return sorted_cutoff
def Guasse(self, distance, max_id, threshold):
'''
:rtype: list with Gaussian kernel values by desc
'''
guasse = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
tmp += np.exp(-pow((distance[(i, j)] / threshold), 2))
guasse[i] = tmp
sorted_guasse = sorted(guasse.items(), key=lambda k:k[1], reverse=True)
return sorted_guasse
def min_distance(self, distance, srt_dens, maxid):
'''
:srt_dens: desc sorted list with density values (point, density)
:rtype: min distance dict
min number dict
'''
min_distance = dict()
min_number = dict()
h_dens = srt_dens[0][0]
min_number[h_dens] = 0
max_dist = -1
for i in range(1, maxid + 1):
max_dist = max(distance[(h_dens, i)], max_dist)
min_distance[h_dens] = max_dist
for j in range(1, len(srt_dens)):
min_dist, min_num = 1, 0
current_num = srt_dens[j][0]
for k in srt_dens[0:j]:
current_dist = distance[(current_num, k[0])]
if current_dist < min_dist:
min_dist, min_num = current_dist, k[0]
min_distance[srt_dens[j][0]] = min_dist
min_number[current_num] = min_num
return min_distance, min_number
def make_pair(self, srt_dens, min_dist, maxid):
'''
:rtype: pair dict with {point: [density, min dist]}
refer factor dict with {point: density * dist}
'''
pair_dict = dict()
dens_dict = dict()
refer_dict = dict()
# convert list to dict
for elem in srt_dens:
dens_dict[elem[0]] = elem[1]
if len(dens_dict) == maxid:
for key in dens_dict.keys():
pair_dict[key] = [dens_dict[key], min_dist[key]]
refer_dict[key] = dens_dict[key] * min_dist[key]
else:
return print('missing %d value', maxid - dens_dict)
return pair_dict, refer_dict
def show_pair_info(self, pair, threshold):
show_dict = dict()
for p in pair.values():
show_dict[p[0]] = p[1]
tmp = sorted(show_dict.items())
dens, mdis = zip(*tmp)
plt.scatter(dens, mdis)
plt.xlabel(r'$\rho$')
plt.ylabel(r'$\delta$')
plt.title(r'$d_c=$' + str(threshold))
plt.savefig('./images/Decision Graph Cutoff test.png')
plt.close()
I tried to figure by using fil-profile and got a problem with line 11 which indicate this data_process
An issue could be f.readlines() as it creates a complete list.
So if COOR_DATA is very large then you should only create memory for one line at a time, so try changing:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
lines = f.readlines()
coords = dict()
for line in lines:
To:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
coords = dict()
for line in f:
See https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects

Categories

Resources