How to avoid out of memory python? - python

I'm new to python and ubuntu. i got killed after running python code. The file I'm using for the code is around 2.7 GB and I have 16 GB RAM with one tera hard ... what should I do to avoid this problem because I'm searching and found it seems to be out of memory problem
I used this command
free -mh
I got
total used free shared buff/cache available
Mem: 15G 2.5G 9.7G 148M 3.3G 12G
Swap: 4.0G 2.0G 2.0G
the code link I tried Link
import numpy as np
import matplotlib.pyplot as plt
class ProcessData(object):
def data_process(self, folder):
'''
:folder: data file path
:rtype: dict pair distance
MAX id number
'''
distance = dict()
max_pt = 0
with open(folder, 'r') as data:
for line in data:
i, j, dis = line.strip().split()
i, j, dis = int(i), int(j), float(dis)
distance[(i, j)] = dis
distance[(j, i)] = dis
max_pt = max(i, j, max_pt)
for num in range(1, max_pt + 1):
distance[(num, num)] = 0
return distance, max_pt
def entropy(self, distance, maxid, factor):
'''
:distance: dict with pair: dist
:factor: impact factor
:maxid: max elem number
:rtype: entropy H in data field
'''
potential = dict()
for i in range(1, maxid + 1):
tmp = 0
for j in range(1, maxid + 1):
tmp += np.exp(-pow(distance[(i, j)] / factor, 2))
potential[i] = tmp
z = sum(potential.values())
H = 0
for i in range(1, maxid + 1):
x = potential[i] / z
H += x * np.log(x)
return -H
def threshold(self, dist, max_id):
'''
:rtype: factor value makes H smallest
'''
entro = 10.0
# given data:
# 0.02139999999999999 7.203581306901208
# 0.02149999999999999 7.203577254067677
# 0.02159999999999999 7.203577734107922
# generate data:
# 0.367020, 6.943842
# 0.368959, 6.943840
# 0.370898, 6.943841
scape = np.linspace(0.330, 0.430, 50)
# 通用数据使用以下一行
# scape = np.linspace(0.001, 1.001, 100)
for factor in scape:
value = self.entropy(dist, max_id, factor)
print('factor: {0:.6f}, entropy: {1:.8f}'.format(factor, value))
# plt.scatter(factor, value, c='r', s=1)
if value and value < entro:
entro, thresh = value, factor
thresh = 3 * thresh / pow(2, 0.5)
"""
plt.xlabel(r'$\sigma$')
plt.ylabel(r'H')
plt.savefig('./images/Entropy test.png')
plt.close()
"""
print('current: ', entro, thresh)
# given data: 7.203577254067677 0.04560838738653229
# generate data: 6.943840312796875 0.7828967189629044
return thresh
def CutOff(self, distance, max_id, threshold):
'''
:rtype: list with Cut-off kernel values by desc
'''
cut_off = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
gap = distance[(i, j)] - threshold
tmp += 0 if gap >= 0 else 1
cut_off[i] = tmp
sorted_cutoff = sorted(cut_off.items(), key=lambda k:k[1], reverse=True)
return sorted_cutoff
def Guasse(self, distance, max_id, threshold):
'''
:rtype: list with Gaussian kernel values by desc
'''
guasse = dict()
for i in range(1, max_id + 1):
tmp = 0
for j in range(1, max_id + 1):
tmp += np.exp(-pow((distance[(i, j)] / threshold), 2))
guasse[i] = tmp
sorted_guasse = sorted(guasse.items(), key=lambda k:k[1], reverse=True)
return sorted_guasse
def min_distance(self, distance, srt_dens, maxid):
'''
:srt_dens: desc sorted list with density values (point, density)
:rtype: min distance dict
min number dict
'''
min_distance = dict()
min_number = dict()
h_dens = srt_dens[0][0]
min_number[h_dens] = 0
max_dist = -1
for i in range(1, maxid + 1):
max_dist = max(distance[(h_dens, i)], max_dist)
min_distance[h_dens] = max_dist
for j in range(1, len(srt_dens)):
min_dist, min_num = 1, 0
current_num = srt_dens[j][0]
for k in srt_dens[0:j]:
current_dist = distance[(current_num, k[0])]
if current_dist < min_dist:
min_dist, min_num = current_dist, k[0]
min_distance[srt_dens[j][0]] = min_dist
min_number[current_num] = min_num
return min_distance, min_number
def make_pair(self, srt_dens, min_dist, maxid):
'''
:rtype: pair dict with {point: [density, min dist]}
refer factor dict with {point: density * dist}
'''
pair_dict = dict()
dens_dict = dict()
refer_dict = dict()
# convert list to dict
for elem in srt_dens:
dens_dict[elem[0]] = elem[1]
if len(dens_dict) == maxid:
for key in dens_dict.keys():
pair_dict[key] = [dens_dict[key], min_dist[key]]
refer_dict[key] = dens_dict[key] * min_dist[key]
else:
return print('missing %d value', maxid - dens_dict)
return pair_dict, refer_dict
def show_pair_info(self, pair, threshold):
show_dict = dict()
for p in pair.values():
show_dict[p[0]] = p[1]
tmp = sorted(show_dict.items())
dens, mdis = zip(*tmp)
plt.scatter(dens, mdis)
plt.xlabel(r'$\rho$')
plt.ylabel(r'$\delta$')
plt.title(r'$d_c=$' + str(threshold))
plt.savefig('./images/Decision Graph Cutoff test.png')
plt.close()
I tried to figure by using fil-profile and got a problem with line 11 which indicate this data_process

An issue could be f.readlines() as it creates a complete list.
So if COOR_DATA is very large then you should only create memory for one line at a time, so try changing:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
lines = f.readlines()
coords = dict()
for line in lines:
To:
with open(COOR_DATA, 'r', encoding='utf-8') as f:
coords = dict()
for line in f:
See https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects

Related

Python | Search Algorithm just taking up 100% of CPU and never returning anything

I am trying to write a search algorithm that takes in a start point and then return the path to the end point, I originally tried just doing it via some nested for loops and a list of lists so that I could just loop through and try to find a path but the RAM requirements convinced me to try it using a class-based system. However, all it is doing is taking like 2gb of RAM and 100% of one of my CPU cores and just sitting there without exiting. If anyone sees a problem in my code, any help would be greatly appreciated.
import csv
import math
from multiprocessing import Process
from rplidar import RPLidar
import heapq
lidar = RPLidar('/dev/ttyUSB0')
file = "lidar01.csv"
def calc_offset():
# take in argos ros data and calculate offset
x_offset = 0
y_offset = 0
return x_offset, y_offset
def find_fix_quad_convert(x, y):
offset_x, offset_y = calc_offset()
if x >= 0 and y >= 0:
x = abs(x + 12000 + offset_x)
y = abs(y + offset_y)
return x,y
elif x < 0 and y >= 0:
x = abs(x - 12000 + offset_x)
y = abs(y + offset_x)
return x,y
elif x < 0 and y < 0:
x = abs(x - 12000 + offset_x)
y = abs(y - 12000 + offset_y)
return x,y
elif x >= 0 and y < 0:
x = abs(x + 12000 + offset_x)
y = abs(y - 12000 + offset_y)
return x,y
def scan():
try:
for scan in enumerate(lidar.iter_scans()):
list_version_data = list(scan)
for data in list_version_data:
if isinstance(data, list):
for indiv_data_points in data:
if isinstance(indiv_data_points, tuple):
list_indiv_data_points = list(indiv_data_points)
list_indiv_data_points.pop(0)
angle = list_indiv_data_points[0]
distance = list_indiv_data_points[1]
length = distance
angle = angle
angle = math.radians(angle)
x,y = (length * math.cos(angle)), (length * math.sin(angle))
x = int(x)
y = int(y)
new_x,new_y = find_fix_quad_convert(x,y)
with open(file=file, mode="w") as f:
writer = csv.writer(f)
writer.writerow([new_x,new_y])
except Exception as e:
print(e)
pass
def eliminate_duplicates():
unique_coords = set()
with open(file, 'r') as f:
reader = csv.reader(f)
for row in reader:
coord = (row[0], row[1])
if coord not in unique_coords:
unique_coords.add(coord)
with open(file, 'w') as f:
writer = csv.writer(f)
for coord in unique_coords:
writer.writerow(coord)
# create the node class that takes in the individual data points and creates a node for the nav graph
class Node:
def __init__(self, x, y):
self.x = x
self.y = y
self.neighbors = []
self.parent = None
def __eq__(self, other):
return self.x == other.x and self.y == other.y
def __lt__(self, other):
return self.f < other.f
def scan_eliminate_duplicates():
scan_process = Process(target=scan)
eliminate_duplicates_process = Process(target=eliminate_duplicates)
scan_process.start()
scan_process.join()
eliminate_duplicates_process.start()
eliminate_duplicates_process.join()
def find_path(start, end, nodes):
open_set = []
closed_set = set()
start.f = 0
heapq.heappush(open_set, start)
while open_set:
current_node = heapq.heappop(open_set)
closed_set.add(current_node)
if current_node == end:
print(f"Path found: {0}".format(construct_path(current_node)))
return construct_path(current_node)
for neighbor in current_node.neighbors:
if neighbor in closed_set:
continue
tentative_g = current_node.f + 1
if neighbor not in open_set or tentative_g < neighbor.f:
neighbor.parent = current_node
neighbor.f = tentative_g
if neighbor not in open_set:
heapq.heappush(open_set, neighbor)
return None
def construct_path(node):
path = []
while node.parent:
path.append((node.x, node.y))
node = node.parent
return path[::-1]
if __name__ == "__main__":
scan_elim_dupl_process = Process(target=scan_eliminate_duplicates)
nodes = []
with open(file, "r") as f:
reader = csv.reader(f)
for row in reader:
node = Node(int(float(row[0])), int(float(row[1])))
nodes.append(node)
# set start and end nodes
start = Node(3201, 3201)
end = Node(23000, 23000)
# connect the nodes to their neighbors
for i, node in enumerate(nodes):
for j in range(i+1, len(nodes)):
if abs(node.x - nodes[j].x) <= 1 and abs(node.y - nodes[j].y) <= 1:
node.neighbors.append(nodes[j])
nodes[j].neighbors.append(node)
find_path_process = Process(target=find_path, args=(start, end, nodes))
scan_elim_dupl_process.start(), find_path_process.start()
scan_elim_dupl_process.join(), find_path_process.join()
CSV Data(example):
-224.45409129769087,-75.30553365940557
-225.4021550412925,-75.62361405501024
-221.37533513849013,-86.0186665341958
-222.02088232366805,-83.6318737815909
-219.05825287406182,-90.4570718504838
-216.1406631194247,-97.22249609167298
-212.35203834877927,-105.80252506022047
-210.74781416150145,-110.5864314739799
-209.03673236351906,-114.8298503124623
-207.00083783790242,-118.46055518359869
-202.61438759451943,-126.76123200607452
-200.80257079121006,-132.35776351858277
-198.46526749871208,-137.60010027854142
-200.72914689131528,-136.5114357417897
-198.8372035969446,-141.42053056663028
-195.46212772818174,-148.12872484421098
-192.555826974252,-155.49438413737627
-191.2678199044531,-159.4290471306835
-204.80806340541443,-686.6046221546457
-189.9329560617947,-692.9413555284663
-174.4435476087335,-698.0327346891975
-157.25903052807882,-703.8971230352976
-142.50543063768973,-710.3467126965301
-44.080364168658264,-424.9699801100761
-12.039081185863548,-420.3276228422303
151.3137816891034,-880.5943387683925
171.58805621421078,-880.1807209116937
192.6920068673774,-879.3860659513674
213.97191813826333,-877.540073585379
235.7914668768005,-874.2611933215878
257.2898049769299,-872.088022366397
279.8247876333225,-870.8993344388122
301.1827105821156,-869.3037299163104
323.32023991345227,-866.9207489513143
344.60980320968105,-865.141980273634
368.38864460533046,-862.6319067399766
390.5808612705762,-860.7811297357389
413.666591519788,-858.81634448839
437.78734499029486,-856.893985896942
462.98529035913396,-854.9354849408629
488.6789701052747,-851.7727773161384
513.1091380975454,-851.3254444692665
540.2315442531086,-849.5265982262719
566.5849433716348,-845.456423740787
593.946149447507,-843.3387347658589
620.2144841174974,-841.046368335817
649.1761458917417,-837.0071051700481
678.6559458023329,-834.341870714362
709.32872426918,-831.894418437014
739.9610013434269,-829.0562580373138
772.0166065569867,-826.5297086011094
807.4258588219725,-823.4373352026249
841.6994505337709,-821.2904693049519
878.8426320460152,-818.1908032961703
917.0058399786907,-814.8716782076648
953.9215320868914,-809.3421468211068
989.2825428144441,-801.5520026689394
1089.7385480638236,-803.6080492775999
1124.2452992304018,-789.0340659048534
1161.1577259536114,-774.4874420920189
1207.7231504414601,-765.7054210907444
1256.6619459378417,-758.3474244247931
1312.4869985934681,-749.6733478810021
1436.2817842205613,-736.2680465130896
560.4785752204706,-119.13339883857641
561.947341484636,-105.41939052351769
562.7845996626268,-91.97041308256136
562.0728347190211,-80.08864445677706
572.2556983202762,-67.73092528507895
3073.007418570301,775.0941671254502
3076.1322580025953,851.280443670507
3085.7160627583366,932.367218447319
3079.5934798899584,1010.8439845590693
3065.409617566463,1086.4593591253329
3049.6010101414113,1162.0524639380467
553.4062949898384,280.6451899919539
517.2886535413827,292.48164287240894
504.22866463801756,302.9711475658394
493.01596196440715,311.63411839577225
457.35133669092596,320.98491760053656
448.3587658812583,330.0681438088732
438.5893339206918,341.3172405124065
430.36674574745746,351.23313362315815
396.83008722312223,357.699516877629
385.27345578604894,365.4112262460958
375.7639852425002,372.7022805064037
366.43229041712107,379.4517446786384
355.57191272554525,387.6140024311522
346.70803875198897,395.5623033919553
332.8988762943624,398.49579754616076
315.8504082369599,398.19163993804005
300.8542343626438,400.6091981795568
289.23868520741775,402.93545758531144
277.5650855793172,406.2285357620096
272.3911822392343,417.2003191972799
262.73233641927243,427.16971966616535
253.20792971902577,432.01424377837924
249.1985319999312,447.5490522267691
292.7465904906362,640.521269167627
272.64249172224527,636.02678536952
804.8967316661586,3209.614034639233
724.5030467972489,3205.9040686959897
646.5556113779995,3209.2685932928116
567.8035492364211,3204.8395735160475
486.7038419116777,3179.46404607261
409.31126573218995,3155.564654343928
335.12423534291634,3147.2077698947405
111.3757146106589,3140.2755472561585
39.18188674771358,3123.254237130063
-35.079705269662,3137.303884592341
-108.12103095017433,1135.8656798522752
-135.12589586812848,1133.7257405412631
-478.8748400704411,2350.463879758102
-523.4289972333165,2298.6579421165134
-321.0672961603582,1006.7950902920995
-337.5896468300176,906.691502305599
-362.5912398939693,906.686187582095
-386.4405220459153,909.0180336059789
-348.11121502543114,476.6693240324135
-709.9009007847953,689.6707718650517
-705.7154352397234,654.50131738936
-225.61815673209847,73.37581245076781
-224.96174273211477,60.673011355377774
-221.7058096465867,57.33702092846706
-218.36099776953716,54.2537985130275
-217.26139804313215,45.62329361569259
-215.60048241436377,36.98640943229841
-212.9460118930461,31.409529108961046
-210.41143597187929,27.876336062182475
-209.29301055390292,20.037420824145237
-207.148062758457,18.806384432377264
-205.20651537090765,10.97889107922301
-203.40844501247898,6.103646254929753
-201.49568420532205,1.3188050004214316
-199.7214487838248,-3.3771875414384667
-198.08854835941534,-7.9993442768494845
-196.3529499548831,-12.493259943472601
-194.511648279817,-16.964114579569504
-192.81795218045707,-21.3831082150133
-191.27242817254086,-25.765446260062987
-189.63616139426645,-30.0354172877243
-188.37840041163838,-34.43585716012369
-226.49932324600476,-68.91022470651103
-226.1737175842578,-73.28377701863081
-224.04801115257553,-79.54590623391874
-221.56247177423467,-85.53549614804056
-217.15764842108885,-95.55165216898537
-215.7143937962088,-98.164659165782
-213.46548271945335,-102.96352600484683
-212.697138624531,-105.10703935006946
-210.6167482193979,-110.29799576368899
-205.56454558388867,-120.93579949249667
-203.34370277416045,-126.06308358156993
-202.659741276799,-128.5652723935235
-198.05436632376652,-137.315213942561
-200.01801379745294,-136.66695524713327
-195.89271630106168,-144.21028465470746
-196.10659851005252,-147.2744530487792
-191.887743885396,-155.12787063120754
-190.4663185563427,-159.99732496386864
-240.31744343455787,-628.0953561212486
-194.36681058896912,-689.8928217060037
-179.58107052974322,-695.954166312259
-162.78643452157806,-701.6128841717147
-148.54493769413048,-708.3420529556655
-48.61733572180791,-427.74595809582235
-36.52211770286281,-421.42037791081987
-14.701911453818868,-417.7413719032436
121.73474418061696,-881.8875861238097
142.58207691392846,-880.0242049755851
164.10647050731015,-880.0804672515084
185.3187865324572,-878.9254859532392
207.6388003465223,-877.5187696514856
227.2478526229215,-875.7459540176426
247.99909241264257,-872.9562991138248
269.3479160261434,-870.7950103970358
292.78503356447436,-868.7390210072583
313.92451252961064,-866.9114144092499
335.83498334245536,-864.2959137143787
358.6456719461373,-861.8558649764493
381.566664921735,-860.1673559374968
403.5993305416195,-860.0044071900775
427.212685906662,-858.0277215803786
451.6489095368339,-855.0451815630497
476.82947920642863,-854.1701881122556
501.87089118531856,-853.0713165268506
528.0321382457586,-850.695964184392
554.4110689448593,-846.6240999590187
579.1151539748655,-843.6031062867585
606.096392136026,-841.4369634379586
635.4714561227303,-839.0067213397381
664.7834755172479,-837.1522744872694
695.0210066681005,-833.8201546437099
724.3369310372825,-830.5058761595194
755.2483075753681,-828.7349132289179
790.0536662312429,-825.3008266532705
823.3915496973328,-822.0453794572566
857.6177770538111,-819.1994558599753
896.6246516997196,-816.2233052078068
933.9759389836386,-814.1314054866272
965.93739908313,-802.0256797961757
1106.8124778852348,-795.8748025270977
1145.3782516272215,-784.4403885569442
1185.8751583397407,-772.8779715017727
1231.3279001110159,-763.8275999256613
1283.8274490578733,-756.2321608114493
1339.6622193807364,-748.3283958021902
1399.0838013856292,-739.5071106489357
559.0470439861833,-113.7394065850111
556.9664328067189,-99.30486003493867
558.3537490936984,-86.40654415616522
565.1640657918545,-74.09169142097093
576.7394965367929,-61.412972037564295
3083.2240320377678,726.8903842841579
3075.974377735485,801.7687883765465
3077.959314383161,878.9581668099685
3081.5656872937416,958.6946737068328
3079.603817010731,1036.9621884393555
3065.787463056901,1112.0695993790728
3041.893912737369,1184.8382478850592
565.3389250472349,272.8771927560765
548.9161042252287,281.1968581300259
512.1181201590852,295.48535243684216
500.3702034741492,306.4395894385034
489.0168090088083,315.1120800712703
453.6658726040589,323.5967220699212
445.1879662404556,334.3325249129779
436.9264391821313,344.2518689036028
404.1328350154679,352.8581891672072
393.65549849104536,359.33876566238513
382.9718830476642,366.0158456883065
373.6310075466433,373.4272789977725
362.9032487936391,381.7941782099644
354.38182598682874,389.7166713270566
342.20930022858226,397.153443063338
One problem is that this line does not behave like you seem to be expecting:
for scan in enumerate(lidar.iter_scans()):
Looking at the source code, this appears to iterate through scans as they come in. In other words, it's a continual stream of incoming data. You need to update your code to have a non-error exit condition. The README in the source repo has this as an example:
for i, scan in enumerate(lidar.iter_scans()):
print('%d: Got %d measurments' % (i, len(scan)))
if i > 10:
break
Another problem is that you've got multiple processes running, which makes debugging significantly more challenging. I would suggest simplifying your __main__ section to this until you've made sure your find_path method is correct:
if __name__ == "__main__":
nodes = []
with open(file, "r") as f:
reader = csv.reader(f)
for row in reader:
node = Node(int(float(row[0])), int(float(row[1])))
nodes.append(node)
# set start and end nodes
start = Node(3201, 3201)
end = Node(23000, 23000)
# connect the nodes to their neighbors
for i, node in enumerate(nodes):
for j in range(i+1, len(nodes)):
if abs(node.x - nodes[j].x) <= 1 and abs(node.y - nodes[j].y) <= 1:
node.neighbors.append(nodes[j])
nodes[j].neighbors.append(node)
find_path(start, end, nodes)
It would also be helpful for readability if you moved most of this into a separate read_nodes method.

Rabin-Karp 2D pattern search runs slower than brute force

I have implemented the Rabin-Karp 2D algorithm for pattern searching in python. However, my implementation is slower than the brute force version over a 1000x2000 matrix. Please help me identify the bottle neck. Thanks, and I appreciate your comments.
Note 1: the code is right in finding the position where the pattern matches but runs slower, 1.23s v.s. 0.54s for brute force version on my computer.
Note 2: although one can come up with the worst-case such that Rabin-Karp could be as slow as brute force, the test case given is not designed on purpose to make it O(m(n-m+1)).
Disclaimer : Although this problem is an assignment problem in Algorithms, 4th Edition by Sedgewick and Wayne, it is not my homework. I am learning this algorithm.
Here is the code:
'''
Searches for a 2D pattern in a 2D text. Assumes that both the pattern and the
text are rectangles of characters.
O(Mr * Nr * Nc), where Mr is the pattern row length, Nr is the text row length
and Nc is the text column length
'''
MOD = 10**9+7
class RabinKarp2DV3(object):
def __init__(self, rad, pattern):
#Radix of the alphabet. Assumes ASCII characters
self.RADIX = rad
self.pattern = pattern
self.height = len(pattern)
self.width = len(pattern[0])
self.factors_col = [0]*(self.height)
self.factors_row = [0]*(self.width)
self.factors_col[0] = 1
for i in range(1, len(self.factors_col)):
self.factors_col[i] = (self.RADIX * self.factors_col[i - 1]) % MOD
self.factors_row[0] = 1
for i in range(1, len(self.factors_row)):
self.factors_row[i] = (self.RADIX * self.factors_row[i - 1]) % MOD
hash1d_p = [0]*self.width
self.hash2D(self.pattern, hash1d_p, self.width)
self.patternHash = self.SingleHash(hash1d_p)
def hash2D(self, data, hash1d, hei):
for i in range(hei):
hash1d[i] = 0
for j in range(self.height):
hash1d[i] = (self.RADIX * hash1d[i] + ord(data[j][i])) % MOD
def rehash2D(self, data, hash1d, hei, j):
for i in range(hei):
hash1d[i] = self.RADIX*((hash1d[i] + MOD - self.factors_col[self.height-1]
* ord(data[j][i])%MOD) % MOD) % MOD
hash1d[i] = (hash1d[i] + ord(data[j+self.height][i])) % MOD
def SingleHash(self, hash1d):
res = 0
for i in range(self.width):
res = (self.RADIX * res + hash1d[i]) % MOD
return res
def SingleReHash(self, hash, hash1d, pos):
hash = self.RADIX*((hash + MOD - self.factors_row[self.width-1]*hash1d[pos]%MOD) % MOD) % MOD
hash = (hash + hash1d[pos+self.width]) % MOD
return hash
def check(self, text, i, j):
x, y = i, j
for a in range(self.height):
for b in range(self.width):
if text[x][y] != self.pattern[a][b]:
return False
y += 1
x += 1
y = j
return True
def search(self, text):
hash1d = [0]*len(text[0])
for i in range(len(text)-self.height+1):
if i == 0:
self.hash2D(text, hash1d, len(text[0]))
else:
self.rehash2D(text, hash1d, len(text[0]), i-1)
textHash = 0
for j in range(len(text[0]) - self.width+1):
if j == 0:
textHash = self.SingleHash(hash1d)
else:
textHash = self.SingleReHash(textHash, hash1d, j-1)
#print(i, j, textHash, patternHash)
if textHash == self.patternHash and self.check(text, i, j):
return [i, j]
return None
class BruteForce(object):
def __init__(self, pattern):
self.pattern = pattern
self.height = len(pattern)
self.width = len(pattern[0])
def check(self, text, i, j):
x, y = i, j
for a in range(self.height):
for b in range(self.width):
if text[x][y] != self.pattern[a][b]:
return False
y += 1
x += 1
y = j
return True
def search(self, text):
for i in range(len(text)-self.height+1):
for j in range(len(text[0]) - self.width+1):
if self.check(text, i, j):
return [i, j]
return None
if __name__ == "__main__":
import random
import string
import time
chars = string.ascii_uppercase
im, jm = 1000, 2000
text = []
for i in range(im):
s = ''
for j in range(jm):
s += random.choice(chars)
text.append(s)
pattern = []
for i in range(20):
pattern.append(text[357+i][478:478+40])
start_time = time.time()
matcher = RabinKarp2DV3(256, pattern)
print(matcher.search(text))
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
matcher = BruteForce(pattern)
print(matcher.search(text))
print("--- %s seconds ---" % (time.time() - start_time))

Degree, Proximity and Rank Prestige

I want to find these three Prestige measures for an existing graph using python:
Degree Prestige
Proximity Prestige
Rank Prestige
Can I use networkx for this purpose? If not, then which library can I use and how can I do it. Any links or references are appreciated.
Yes, you can but you to implement the measures by yourself as far as I know.
For instance, consider the Degree prestige defined as the number of incoming links to a node divided by the total possible number of incoming links.
In this case you could just calculate it as:
n_nodes = 10
d = nx.gnp_random_graph(n_nodes, 0.5, directed=True)
degree_prestige = dict((v,len(d.in_edges(v))/(n_nodes-1)) for v in d.nodes_iter())
Same for the other measures which can be easily implemented used the functions defined by networkx.
n_nodes = 5
d = nx.gnp_random_graph(n_nodes, 0.5, directed=True)
degree_prestige = dict((v,len(d.in_edges(v))/(n_nodes-1)) for v in d.nodes())
print("DEGREE PRESTIGE :\n")
for i in degree_prestige:
print(i, " : ", degree_prestige[i])
distance = []
temp_dis = 0
n = 0
for dest in d.nodes:
temp_dis = 0
n = 0
for src in d.nodes:
if (nx.has_path(d,src,dest) == True):
temp_dis = temp_dis + nx.shortest_path_length(d,source = src,target = dest)
n = n + 1
if temp_dis == 0:
distance.append([dest, 0])
else:
distance.append([dest, temp_dis/(n - 1)])
print("\nPROXIMITY PRESTIGE :\n")
for i in distance:
print(str(i[0]) + " : " + str(i[1]))
prominance = np.random.randint(1, 4, size=n_nodes)
print("\nASSUME PROMINANCE :\n")
print(prominance)
rank_prestige = np.zeros([n_nodes], dtype = int)
path_matrix = np.zeros([n_nodes, n_nodes], dtype = int)
i = 0
j = 0
for src in d.nodes:
for dest in d.nodes:
if d.has_edge(dest, src):
path_matrix[i][j] = 1
j = j+1
j = 0
i = i+1
for i in range(n_nodes):
pr_i = 0
for j in range(n_nodes):
pr_i = pr_i + path_matrix[i][j] * prominance[j]
rank_prestige[i] = pr_i
print("\nRANK PRESTIGE :\n")
print(rank_prestige)

How to give each Category a color?

We have a code to draw circles on the Location on the map with the name of each category. Now the circles and text are one color. How do we get them in different color's by category? Example: Category Garden: Blue, Category Stone: Grey.
So far the code:
size(1500,800)
background(1)
nofill()
stroke('#f91')
pen(.2)
fill('#f91', 0.05)
rotate(90)
font("Avenir", "bold", 10)
align('left')
def mapValue(value, fromMin, fromMax, toMin, toMax):
# Figure out how 'wide' each range is
fromSpan = fromMax - fromMin
toSpan = toMax - toMin
# Convert the from range into a 0-1 range (float)
valueScaled = float(value - fromMin) / float(fromSpan)
# Convert the 0-1 range into a value in the to range.
return toMin + (valueScaled * toSpan)
def xOfDot(lon):
return mapValue(lon, -100, 100, 0, WIDTH)
def yOfDot(lat):
return mapValue(lat, -90, 90, HEIGHT, 0)
with open('theft-alerts.json', 'r') as inputFile:
data = json.load(inputFile)
print len(data)
artworksPerCity = {}
for stolenArt in data:
if stolenArt.has_key('Category'):
city = stolenArt['Category']
if stolenArt.has_key('nItemsStolen'):
numbersStolen = int(stolenArt['nItemsStolen'])
if artworksPerCity.has_key(city):
# Adjust the value stored for this city
artworksPerCity[city] = artworksPerCity[city] + numbersStolen
else:
# Create new key with new value
artworksPerCity[city] = numbersStolen
# Draw circle on the map
radius = artworksPerCity[city] /2
x = xOfDot(stolenArt['Lon'])
y = yOfDot(stolenArt['Lat'])
arc(x, y, radius)
text(city, x, y)
print artworksPerCity
Here is a sketch of what I intend to include in my pure python data utility.
def hexidecimalDiget(n,deHex = false):
if(n<0):
print "negitive values not supported by call to hexidecimalDiget("+str(n)+")"
return None
elif(n < 10):
return str(n)
elif(n < 15):
return ["a","b","c","d","e"][n-10]
elif(n in ["a","b","c","d","e"]):
if deHex:
return ["a","b","c","d","e"].index(n)
return n
else:
print "call to hexidecimalDiget("+str(n)+") not supported!"
return None
def colorFormHexArray(arr):
if len(arr)!=3 and len(arr)!=6:
print "invalid length for color on call to colorFormHexArray("+str(arr)+")"
return None
elif None in arr:
print "cannot make color from None arguments in "+str(arr)
return None
else:
ret = "#"
for k in arr:
if(type(k) == list):
for k2 in k:
ret+=hexidecimalDiget(k)
else:
ret+=hexidecimalDiget(k)
return ret
def arrayFromColor(c):
c = c.replace("#","")
col = []
for n,k in enumerate(c):
if(len(c) == 3):
col.append([hexidecimalDiget(k,deHex = True)])
elif(len(c) == 6):
col.append([hexidecimalDiget(c[(n+1)*2-2],deHex = True),hexidecimalDiget(c[(n+1)*2-2],deHex = True)])
return(col)
def intFromHexPair(hp):
ret = 0
for n,k in enumerate(hp):
digBase = 16**(len(hp)-n-1)
ret+=digBase*hexidecimalDiget(hp[0],deHex = True)
return ret
def hexPairFromInt(I,minDigits = 1,maxDigits = 256):
if I<0:
print "negitive numbers not supported by hexPairFromInt"
k= 0
while(16**(k+1) <= I):
k+=1
if k < minDigits:
k = minDigits
if k > minDigits:
print("maxDigitsExceeded")
ret = []
while k>=0
dig = 16**k
ret.append(hexidecimalDiget(int(I)%(dig))
I -= dig
k-=1
return ret
def specColor(start,end,bottom,top):
start = arrayFromColor(start)
end = arrayFromColor(end)
def ret(v):
if( v<start or c>end ):
print("value out of range "+str([start,end]))
return('#aa0000') #eyo <- error red
else:
starts = [intFromHexPair(k) for k in start]
ends = [intFromHexPair(hp) for k in end]
normalized = (v-bottom)/(top-bottom)
return colorFormHexArray([hexPairFromInt(int((starts[n]-ends[n])*normalized),minDigits = 1,maxDigits = 256) for n,k in enumerate(starts)])
return ret
This seems excessive and hasn't even been slightly tested yet (just a stetch up atm) but I'll be testing and incorporating this code here tonight :: http://krewn.github.io/KPlot/

Using vectorisation with numpy for the Bellman-Ford algorithm

I've been having a go at writing the Bellman Ford algoritm for finding the shortest path in a graph and while I've got a working solution it doesn't run very quickly and I'm led to believe it could be faster if I use numpy instead of my current approach.
This is the solution I have using for loops:
import os
file = open(os.path.dirname(os.path.realpath(__file__)) + "/g_small.txt")
vertices, edges = map(lambda x: int(x), file.readline().replace("\n", "").split(" "))
adjacency_list = [[] for k in xrange(vertices)]
for line in file.readlines():
tail, head, weight = line.split(" ")
adjacency_list[int(head)-1].append({"from" : int(tail), "weight" : int(weight)})
n = vertices
shortest_paths = []
s=2
cache = [[0 for k in xrange(vertices)] for j in xrange(vertices)]
cache[0][s] = 0
for v in range(0, vertices):
if v != s:
cache[0][v] = float("inf")
# this can be done with numpy I think?
for i in range(1, vertices):
for v in range(0, vertices):
adjacent_nodes = adjacency_list[v]
least_adjacent_cost = float("inf")
for node in adjacent_nodes:
adjacent_cost = cache[i-1][node["from"]-1] + node["weight"]
if adjacent_cost < least_adjacent_cost:
least_adjacent_cost = adjacent_cost
cache[i][v] = min(cache[i-1][v], least_adjacent_cost)
shortest_paths.append([s, cache[vertices-1]])
for path in shortest_paths:
print(str(path[1]))
shortest_path = min(reduce(lambda x, y: x + y, map(lambda x: x[1], shortest_paths)))
print("Shortest Path: " + str(shortest_path))
The input file looks like this -> https://github.com/mneedham/algorithms2/blob/master/shortestpath/g_small.txt
It's mostly uninteresting except for the nested loops about half way down. I've tried to vectorise it using numpy but I'm not really sure how to do it given that the matrix/2D array gets changed on each iteration.
If anyone has any ideas on what I need to do or even something to read that would help me on my way that'd be awesome.
==================
I wrote an updated version to take Jaime's comment into account:
s=0
def initialise_cache(vertices, s):
cache = [0 for k in xrange(vertices)]
cache[s] = 0
for v in range(0, vertices):
if v != s:
cache[v] = float("inf")
return cache
cache = initialise_cache(vertices, s)
for i in range(1, vertices):
previous_cache = deepcopy(cache)
cache = initialise_cache(vertices, s)
for v in range(0, vertices):
adjacent_nodes = adjacency_list[v]
least_adjacent_cost = float("inf")
for node in adjacent_nodes:
adjacent_cost = previous_cache[node["from"]-1] + node["weight"]
if adjacent_cost < least_adjacent_cost:
least_adjacent_cost = adjacent_cost
cache[v] = min(previous_cache[v], least_adjacent_cost)
================
And another new version this time using vectorisation:
def initialise_cache(vertices, s):
cache = empty(vertices)
cache[:] = float("inf")
cache[s] = 0
return cache
adjacency_matrix = zeros((vertices, vertices))
adjacency_matrix[:] = float("inf")
for line in file.readlines():
tail, head, weight = line.split(" ")
adjacency_matrix[int(head)-1][int(tail)-1] = int(weight)
n = vertices
shortest_paths = []
s=2
cache = initialise_cache(vertices, s)
for i in range(1, vertices):
previous_cache = cache
combined = (previous_cache.T + adjacency_matrix).min(axis=1)
cache = minimum(previous_cache, combined)
shortest_paths.append([s, cache])
I ended up with the following vectorised code after following Jaime's advice:
def initialise_cache(vertices, s):
cache = empty(vertices)
cache[:] = float("inf")
cache[s] = 0
return cache
adjacency_matrix = zeros((vertices, vertices))
adjacency_matrix[:] = float("inf")
for line in file.readlines():
tail, head, weight = line.split(" ")
adjacency_matrix[int(head)-1][int(tail)-1] = int(weight)
n = vertices
shortest_paths = []
s=2
cache = initialise_cache(vertices, s)
for i in range(1, vertices):
previous_cache = cache
combined = (previous_cache.T + adjacency_matrix).min(axis=1)
cache = minimum(previous_cache, combined)
shortest_paths.append([s, cache])

Categories

Resources