Multithreading in Python does not give the expected performance - python

I am building an application to solve Matrices in Linear Algebra. I have this class:
class Matrix:
def __init__(self):
self.elements = []
self.height = 0
self.width = 0
def __add__(self, matrix):
resultant_matrix = []
row = []
for i in range(self.height):
row.clear()
for j in range(self.width):
row.insert(j, self.elements[i][j] + matrix.elements[i][j])
resultant_matrix.insert(i, row)
return self.list_to_object(resultant_matrix)
#staticmethod
def list_to_object(list_matrix):
matrix_obj = Matrix()
matrix_obj.elements = list_matrix
matrix_obj.height = len(list_matrix)
matrix_obj.width = len(list_matrix[0])
return matrix_obj
#classmethod
def get_matrix(cls, rows=3, columns=3):
mat = Matrix()
for i in range(rows):
row = []
for j in range(columns):
row.append(j)
mat.elements.append(row)
mat.height = len(mat.elements)
mat.width = len(mat.elements[0])
return mat
My focus is on the add method that helps me to add to matrices.
I have a method in the following class addition to split the matrices into smaller parts, then do the addition process on them, then reassemble them and return the new matrix.
class ThreadingManager:
threads = []
maximum_available_threads = 8
def do_add(self, slice1, slice2, index, _list):
m1 = Matrix.list_to_object(slice1)
m2 = Matrix.list_to_object(slice2)
elements = (m1 + m2).elements
for i, row in enumerate(elements):
_list[index+i] = row
def addition(self, matrix_a, matrix_b):
m1 = matrix_a.elements
m2 = matrix_b.elements
parts_number = self.maximum_available_threads
sub_matrices_m2 = get_sub_matrix(m1, parts_number)
sub_matrices_m3 = get_sub_matrix(m2, parts_number)
new_list = [[] * matrix_a.width] * matrix_a.height
for i in range(len(sub_matrices_m2)):
sub_matrices_m2_i = sub_matrices_m2[i]
sub_matrices_m3_i = sub_matrices_m3[i]
rows_number_per_thread = i*len(sub_matrices_m2_i)
thread = threading.Thread(target=self.do_add, args=(sub_matrices_m2_i, sub_matrices_m3_i,
rows_number_per_thread, new_list))
thread.start()
self.threads.append(thread)
for thread in self.threads:
thread.join()
self.threads.remove(thread)
return Matrix.list_to_object(new_list)
Finally, I have this test case (You can change the 50000 to any number, depending on your computer resources)
mat_a = Matrix.get_matrix(50000, 5)
mat_b = Matrix.get_matrix(50000, 5)
start = datetime.now()
mat_c = mat_a + mat_b
end = datetime.now()
print('time needed before multithreading is:', end - start)
start = datetime.now()
result = ThreadingManager().addition(mat_a, mat_b)
end = datetime.now()
print('time needed before multithreading is:', end - start)
My problem is that I cannot find any difference between the normal addition and multithreading. How can I use multithreading in a better way?
Note: If I removed the line row.clear() in the Matrix class, I have a wrong answer but I can notice the differences after multithreading
needed imports
import math
import threading
from datetime import datetime

Related

How to return a dictionary from a process in Python?

I want to make an inverted index using multiprocessing to speed up its work. My idea is to split the files into groups, and each process will build its own inverted index, and then I want to merge all these indexes into one inverted index. But I don't know how to return them to the main process that will merge them.
import multiprocessing as mp
from pathlib import Path
import re
import time
class InvertedIndex:
def __init__(self):
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt'))
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num
processes = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
currLi = pathList[startIndex:endIndex]
p = mp.Process(target=self.oneProcessTask, args=(currLi,))
processes.append(p)
[x.start() for x in processes]
[x.join() for x in processes]
#staticmethod
def oneProcessTask(listOfDoc):
#print(f'Start: {list[0]}, end: {list[-1]}') # temp
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
def getListOfDoc(self, keyWord):
return self.index[keyWord]
if __name__ == '__main__':
ii = InvertedIndex()
start_time = time.time()
ii.createIndex()
print("--- %s seconds ---" % (time.time() - start_time))
I used multiprocessing.manager to write everything in one dictionary, but that solution was too slow. So I went back to the idea of creating own inverted index for each process and then merging them. But I don't know how to return all indexes to one process.
Take a look at concurrent.futures (native library) with either ThreadPoolExecutor or ProcessPoolExecutor. FYI: I wrote on that in here and did not test but, this is more or less the jist of what I use all the time.
from concurrent.futures import ThreadPoolExecutor, as_completed
def foo(stuff: int) -> dict:
return {}
things_to_analyze = [1,2,3]
threads = []
results = []
with ThreadPoolExecutor() as executor:
for things in things_to_analyze:
threads.append(executor.submit(foo, thing))
for job in as_completed(threads):
results.append(job.results())
I found a solution. I used pool.starmap to return a list of indexes.
My code:
class InvertedIndex:
def __init__(self):
self.smallIndexes = None
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt')) # Рекурсивно проходимо по всіх текстових файлах і робимо з них список
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num # Розраховуємо скільки файлів має обробити один процес
processes_args = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
processes_args.append((path, startIndex, endIndex))
pool = mp.Pool(threads_num)
self.smallIndexes = pool.starmap(self.oneProcessTask, processes_args)
self.mergeIndex()
#staticmethod
def oneProcessTask(path, startIndex, endIndex):
pathList = list(Path(path).glob('**/*.txt'))
listOfDoc = pathList[startIndex:endIndex]
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
return tempDict
Execution time decreased from 200 seconds (when I used shared memory and menger.dict ) to 0.8 seconds (when I used pool.starmap).

Multithreading a numpy nditerator

For an MCMC implementation, I want to calculate the covariance tensor C in numpy.
Working Single-Threaded Code
The distance between two elements is based on the distance between their indices. For reference, here is the working single threaded code (with an example distance):
import numpy as np
#set size, dimensions, etc
size = 20
ndim = 2
shape = (size,)*ndim*2
#initialize tensor
C = np.zeros(shape)
#example distance
dist = lambda x, y: np.sqrt(np.sum((x-y)**2))
#this runs as a class method, so please forgive my sloppy coding here
def update_tensor():
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
update_tensor()
Solution Attempt
Now the issue is, that while applying C to a matrix x is a multithreaded operation:
x = np.random.standard_normal((size,)*ndim)
result = np.tensordot(C, x, axes=ndim)
caculating the entries of C is not. My idea was, to split C after initialization along its first axis and iterate over the chunks separately:
import multiprocessing
def _calc_distances(C):
'Calculate distances of submatrices'
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
return C
def update_tensor(C):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
]
Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
with multiprocessing.Pool(n_processes+1) as p:
#map and stitch together
C = np.concatenate(
p.map(_calc_distances, Chunks)
)
But this fails, because the indeces of the submatrices change.
Question
Is there a nicer solution to this? How do I fix the index issue? Probably the nicest way would be to just iterate over parts of the array with threads sharing the data of C. Is that possible?
Q/A
Q: Do you have to use a numpy iterator?
A: No, it’s nice, but I can give up on that.
Worked like this. Just going to post the class here.
Benchmarks
CPU: Intel Core i5-6300U#2.5GHz, boosting to ~2.9GHz
Windows 10 64-bit, Python 3.7.4, Numpy 1.17
Pro: Less compute time
Con: Uses a little more RAM; somewhat complicated code.
Working Multi-Threaded Code
import multiprocessing
import numpy as np
class CovOp(object):
'F[0,1]^ndim->C[0,1]^ndim'
def f(self, r):
return np.exp(-r/self.ro)#(1 + np.sqrt(3)*r / self.ro) * np.exp(-np.sqrt(3) * r / self.ro)
def dist(self, x,y):
return np.sum((x-y)**2)
def __init__(self, ndim, size, sigma=1, ro=1):
self.tensor_cached = False
self.inverse_cached = False
self.ndim = ndim
self.size = size
self.shape = (size,)*ndim*2
self.C = np.zeros(self.shape)
self.Inv = np.zeros(self.shape)
self.ro = ro * size
self.sigma = sigma
def __call__(self, x):
if not self.tensor_cached:
self.update_tensor
if self.ndim == 0:
return self.sigma * self.C * x
elif self.ndim == 1:
return self.sigma * np.dot(self.C, x)
return self.sigma * np.tensordot(self.C, x, axes=self.ndim)
def _calc_distances(self, Chunk:tuple):
'Calculate distances of submatrices'
C, offset = Chunk
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
idx[0]+=offset
d = self.dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it[0] = self.f(d)
it.iternext()
return C
def update_tensor(self):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
(
self.C[i*self.C.shape[0]//n_processes:(i+1)*self.C.shape[0]//n_processes],
i*self.C.shape[0]//n_processes) for i in range(0, n_processes-1)
]
Chunks.append((
self.C[self.C.shape[0]//n_processes*(n_processes-1):],
self.C.shape[0]//n_processes*(n_processes-1)
)
)
with multiprocessing.Pool(n_processes+1) as p:
self.C = np.concatenate(
p.map(self._calc_distances, Chunks)
)
self.tensor_cached = True
#missing cholesky decomposition
def update_inverse(self):
if self.ndim==1:
self.Inv = np.linalg.inv(self.C)
elif self.ndim>1:
self.Inv = np.linalg.tensorinv(self.C)
else:
self.Inv = 1/self.C
self.inverse_cached = True
def inv(self, x):
if self.ndim == 0:
return self.Inv * x / self.sigma
elif self.ndim == 1:
return np.dot(self.Inv, x) / self.sigma
return np.tensordot(self.Inv, x) / self.sigma
if __name__=='__main__':
size = 30
ndim = 2
depth = 1
Cov = CovOp(ndim, size, 1, .2)
import time
n_tests = 5
t_start = time.perf_counter()
for i in range(n_tests):
Cov.update_tensor()
t_stop = time.perf_counter()
dt_new = t_stop - t_start
print(
'''Benchmark; NDim: %s, Size: %s NTests: %s
Mean time per test:
Multithreaded %ss'''%(ndim, size, n_tests, dt_new/n_tests)
)

tkinter execution dies after about 140 iterations with no error message (mem leak?)

My code dies after about 140+ iterations, and I don't know why. I guess memory leak is a possibility, but I couldn't find it. I also found out that changing some arithmetic constants can prolong the time until the crash.
I have a genetic algorithm that tries to find best (i.e. minimal steps) route from point A (src) to point B (dst).
I create a list of random chromosomes, where each chromosome has:
src + dst [always the same]
list of directions (random)
I then run the algorithm:
find best route and draw it (for visualization purposes)
Given a probability P - replace the chromosomes with cross-overs (i.e. pick 2, and take the "end" of one's directions, and replace the "end" of the second's)
Given probability Q - mutate (replace the next direction with a random direction)
This all goes well, and most of the times I do find a route (usually not the ideal one), but sometimes, when it searches for a long time (say, about 140+ iterations) it just crushes. No warning. No error.
How can I prevent that (a simple iteration limit can work, but I do want the algorithm to run for a long time [~2000+ iterations])?
I think the relevant parts of the code are:
update function inside GUI class
which calls to cross_over
When playing with the update_fitness() score values (changing score -= (weight+1)*2000*(shift_x + shift_y) to score -= (weight+1)*2*(shift_x + shift_y) it runs for a longer time. Could be some kind of an arithmetic overflow?
import tkinter as tk
from enum import Enum
from random import randint, sample
from copy import deepcopy
from time import sleep
from itertools import product
debug_flag = False
class Direction(Enum):
Up = 0
Down = 1
Left = 2
Right = 3
def __str__(self):
return str(self.name)
def __repr__(self):
return str(self.name)[0]
# A chromosome is a list of directions that should lead the way from src to dst.
# Each step in the chromosome is a direction (up, down, right ,left)
# The chromosome also keeps track of its route
class Chromosome:
def __init__(self, src = None, dst = None, length = 10, directions = None):
self.MAX_SCORE = 1000000
self.route = [src]
if not directions:
self.directions = [Direction(randint(0,3)) for i in range(length)]
else:
self.directions = directions
self.src = src
self.dst = dst
self.fitness = self.MAX_SCORE
def __str__(self):
return str(self.fitness)
def __repr__(self):
return self.__str__()
def set_src(self, pixel):
self.src = pixel
def set_dst(self, pixel):
self.dst = pixel
def set_directions(self, ls):
self.directions = ls
def update_fitness(self):
# Higher score - a better fitness
score = self.MAX_SCORE - len(self.route)
score += 4000*(len(set(self.route)) - len(self.route)) # penalize returning to the same cell
score += (self.dst in self.route) * 500 # bonus routes that get to dst
for weight,cell in enumerate(self.route):
shift_x = abs(cell[0] - self.dst[0])
shift_y = abs(cell[1] - self.dst[1])
score -= (weight+1)*2000*(shift_x + shift_y) # penalize any wrong turn
self.fitness = max(score, 0)
def update(self, mutate_chance = 0.9):
# mutate #
self.mutate(chance = mutate_chance)
# move according to direction
last_cell = self.route[-1]
try:
direction = self.directions[len(self.route) - 1]
except IndexError:
print('No more directions. Halting')
return
if direction == Direction.Down:
x_shift, y_shift = 0, 1
elif direction == Direction.Up:
x_shift, y_shift = 0, -1
elif direction == Direction.Left:
x_shift, y_shift = -1, 0
elif direction == Direction.Right:
x_shift, y_shift = 1, 0
new_cell = last_cell[0] + x_shift, last_cell[1] + y_shift
self.route.append(new_cell)
self.update_fitness()
def cross_over(p1, p2, loc = None):
# find the cross_over point
if not loc:
loc = randint(0,len(p1.directions))
# choose one of the parents randomly
x = randint(0,1)
src_parent = (p1, p2)[x]
dst_parent = (p1, p2)[1 - x]
son = deepcopy(src_parent)
son.directions[loc:] = deepcopy(dst_parent.directions[loc:])
return son
def mutate(self, chance = 1):
if 100*chance > randint(0,99):
self.directions[len(self.route) - 1] = Direction(randint(0,3))
class GUI:
def __init__(self, rows = 10, cols = 10, iteration_timer = 100, chromosomes = [], cross_over_chance = 0.5, mutation_chance = 0.3, MAX_ITER = 100):
self.rows = rows
self.cols = cols
self.canv_w = 800
self.canv_h = 800
self.cell_w = self.canv_w // cols
self.cell_h = self.canv_h // rows
self.master = tk.Tk()
self.canvas = tk.Canvas(self.master, width = self.canv_w, height = self.canv_h)
self.canvas.pack()
self.rect_dict = {}
self.iteration_timer = iteration_timer
self.iterations = 0
self.MAX_ITER = MAX_ITER
self.chromosome_list = chromosomes
self.src = chromosomes[0].src # all chromosomes share src + dst
self.dst = chromosomes[0].dst
self.prev_best_route = []
self.cross_over_chance = cross_over_chance
self.mutation_chance = mutation_chance
self.no_obstacles = True
# init grid #
for r in range(rows):
for c in range(cols):
self.rect_dict[(r, c)] = self.canvas.create_rectangle(r *self.cell_h, c *self.cell_w,
(1+r)*self.cell_h, (1+c)*self.cell_w,
fill="gray")
# init grid #
# draw src + dst #
self.color_src_dst()
# draw src + dst #
# after + mainloop #
self.master.after(iteration_timer, self.start_gui)
tk.mainloop()
# after + mainloop #
def start_gui(self):
self.start_msg = self.canvas.create_text(self.canv_w // 2,3*self.canv_h // 4, fill = "black", font = "Times 25 bold underline",
text="Starting new computation.\nPopulation size = %d\nCross-over chance = %.2f\nMutation chance = %.2f" %
(len(self.chromosome_list), self.cross_over_chance, self.mutation_chance))
self.master.after(2000, self.update)
def end_gui(self, msg="Bye Bye!"):
self.master.wm_attributes('-alpha', 0.9) # transparency
self.canvas.create_text(self.canv_w // 2,3*self.canv_h // 4, fill = "black", font = "Times 25 bold underline", text=msg)
cell_ls = []
for idx,cell in enumerate(self.prev_best_route):
if cell in cell_ls:
continue
cell_ls.append(cell)
self.canvas.create_text(cell[0]*self.cell_w, cell[1]*self.cell_h, fill = "purple", font = "Times 16 bold italic", text=str(idx+1))
self.master.after(3000, self.master.destroy)
def color_src_dst(self):
r_src = self.rect_dict[self.src]
r_dst = self.rect_dict[self.dst]
c_src = 'blue'
c_dst = 'red'
self.canvas.itemconfig(r_src, fill=c_src)
self.canvas.itemconfig(r_dst, fill=c_dst)
def color_route(self, route, color):
for cell in route:
try:
self.canvas.itemconfig(self.rect_dict[cell], fill=color)
except KeyError:
# out of bounds -> ignore
continue
# keep the src + dst
self.color_src_dst()
# keep the src + dst
def compute_shortest_route(self):
if self.no_obstacles:
return (1 +
abs(self.chromosome_list[0].dst[0] - self.chromosome_list[0].src[0]) +
abs(self.chromosome_list[0].dst[1] - self.chromosome_list[0].src[1]))
else:
return 0
def create_weighted_chromosome_list(self):
ls = []
for ch in self.chromosome_list:
tmp = [ch] * (ch.fitness // 200000)
ls.extend(tmp)
return ls
def cross_over(self):
new_chromosome_ls = []
weighted_ls = self.create_weighted_chromosome_list()
while len(new_chromosome_ls) < len(self.chromosome_list):
try:
p1, p2 = sample(weighted_ls, 2)
son = Chromosome.cross_over(p1, p2)
if son in new_chromosome_ls:
continue
else:
new_chromosome_ls.append(son)
except ValueError:
continue
return new_chromosome_ls
def end_successfully(self):
self.end_gui(msg="Got to destination in %d iterations!\nBest route length = %d" % (len(self.prev_best_route), self.compute_shortest_route()))
def update(self):
# first time #
self.canvas.delete(self.start_msg)
# first time #
# end #
if self.iterations >= self.MAX_ITER:
self.end_gui()
return
# end #
# clean the previously best chromosome route #
self.color_route(self.prev_best_route[1:], 'gray')
# clean the previously best chromosome route #
# cross over #
if 100*self.cross_over_chance > randint(0,99):
self.chromosome_list = self.cross_over()
# cross over #
# update (includes mutations) all chromosomes #
for ch in self.chromosome_list:
ch.update(mutate_chance=self.mutation_chance)
# update (includes mutations) all chromosomes #
# show all chromsome fitness values #
if debug_flag:
fit_ls = [ch.fitness for ch in self.chromosome_list]
print(self.iterations, sum(fit_ls) / len(fit_ls), fit_ls)
# show all chromsome fitness values #
# find and display best chromosome #
best_ch = max(self.chromosome_list, key=lambda ch : ch.fitness)
self.prev_best_route = deepcopy(best_ch.route)
self.color_route(self.prev_best_route[1:], 'gold')
# find and display best chromosome #
# check if got to dst #
if best_ch.dst == best_ch.route[-1]:
self.end_successfully()
return
# check if got to dst #
# after + update iterations #
self.master.after(self.iteration_timer, self.update)
self.iterations += 1
# after + update iterations #
def main():
iter_timer, ITER = 10, 350
r,c = 20,20
s,d = (13,11), (7,8)
population_size = [80,160]
cross_over_chance = [0.2,0.4,0.5]
for pop_size, CO_chance in product(population_size, cross_over_chance):
M_chance = 0.7 - CO_chance
ch_ls = [Chromosome(src=s, dst=d, directions=[Direction(randint(0,3)) for i in range(ITER)]) for i in range(pop_size)]
g = GUI(rows=r, cols=c, chromosomes = ch_ls, iteration_timer=iter_timer,
cross_over_chance=CO_chance, mutation_chance=M_chance, MAX_ITER=ITER-1)
del(ch_ls)
del(g)
if __name__ == "__main__":
main()
I do not know if you know the Python Profiling tool of Visual Studio, but it is quite useful in cases as yours (though I usually program with editors, like VS Code).
I have run your program and, as you said, it sometimes crashes. I have analyzed the code with the profiling tool and it seems that the problem is the function cross_over, specifically the random function:
I would strongly suggest reviewing your cross_over and mutation functions. The random function should not be called so many times (2 millions).
I have previously programmed Genetic Algorithms and, to me, it seems that your program is falling into a local minimum. What is suggested in these cases is playing with the percentage of mutation. Try to increase it a little bit so that you could get out of the local minimum.

Why is my breadth first search algorithm so slow?

I am implementing a breadth first search algorithm in Python to solve the 8 puzzle game http://mypuzzle.org/sliding. I was told that this algorithm should not take more than a few minutes to run, but its been running for over an hour. Is there something wrong with my algorithm or implementation? Or was the person who told me this could run in a few minutes mistaken?
Here is my code:
Edit: You can run this in the command line with python driver.py bfs <nine numbers separated by commas> I'm using python driver.py bfs 9,3,7,5,0,6,4,1,2
driver.py
import sys
from SearchAlgorithms import bfs#, dfs, ast, ida
if __name__=='__main__':
board = [int(s) for s in sys.argv[2].split(',')]
if(sys.argv[1]=='bfs'):
output = bfs(board)
elif(sys.argv[1]=='dfs'):
output = bfs(board)
elif(sys.argv[1]=='ast'):
output = bfs(board)
elif(sys.argv[1]=='ida'):
output = bfs(board)
else:
raise IOError
print(output)
SearchAlgorithms.py
from queue import Queue
import math
import time
import copy
class Board:
def __init__(self, data, path=[]):
self.data = data
self.path = path
def neighbors(self):
neighbors = []
board_width = int(math.sqrt(len(self.data)))
if self.data.index(0) - board_width >= 0:
new_data = copy.deepcopy(self.data)
new_data[self.data.index(0)], new_data[self.data.index(0) - board_width] = self.data[self.data.index(0) - board_width], 0
neighbors.append(Board(new_data, self.path + ['Up']))
if self.data.index(0) + board_width < len(self.data):
new_data = copy.deepcopy(self.data)
new_data[self.data.index(0)], new_data[self.data.index(0) + board_width] = self.data[self.data.index(0) + board_width], 0
neighbors.append(Board(new_data, self.path + ['Down']))
if self.data.index(0) - 1 >= 0:
new_data = copy.deepcopy(self.data)
new_data[self.data.index(0)], new_data[self.data.index(0) - 1] = self.data[self.data.index(0) - 1], 0
neighbors.append(Board(new_data, self.path + ['Left']))
if self.data.index(0) + 1 < len(self.data):
new_data = copy.deepcopy(self.data)
new_data[self.data.index(0)], new_data[self.data.index(0) + 1] = self.data[self.data.index(0) + 1], 0
neighbors.append(Board(new_data, self.path + ['Right']))
return neighbors
def bfs(board):
before = time.time()
goal = sorted(board)
graph = Board(board)
frontier = [graph]
explored = set()
count = 0
while frontier:
count +=1
if count % 100 == 0:
print(count)
state = frontier.pop(0)
explored.add(state)
if state.data == goal:
return {'path_to_goal': reversed(state.path),
'cost_of_path': len(state.path),
'nodes_expanded': len(explored),
'fringe_size': len(frontier),
'running_time': time.time() - before
}
for neighbor in state.neighbors():
if neighbor not in set().union(frontier, explored) :
frontier.append(neighbor)
return False
Thank you for your help!

How can I parallel parsing in python?

I have the following code which converts graph from edges list to adjacency matrix:
for line in open('graph.txt'):
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
I.append(i)
J.append(j)
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
return coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')
This code is awfully slow -- on may machine conversion of 500k edges takes hours. On the other hand i/o is obviously is not bottleneck (I can read full file in memory almost instantaneously) so I think there is a room for parallelism. But I'm not sure how to proceed: should I read file in parallel or something?
Use multiprocessing one way to do it is this. I did not check and could be further improved
import multiprocessing
class Worker(multiprocessing.Process):
def __init__(self, queue, results):
multiprocessing.Process.__init__(self):
self.q = queue
self.results = results
def run(self):
while True:
try:
lineno, linecontents = self.q.get(block=False)
except Queue.Empty:
break
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
self.results.put((i, j))
def main():
q = multiprocessing.Queue()
results = multiprocessing.JoinableQueue()
for i, l in open(fname):
q.put((i, l))
for _ in xrange(4):
w = Worker(q, results)
w.start()
I, J = []
while True:
try:
i, j = results.get(block=False)
except Queue.Empty:
break
I.append(i)
J.append(j)
results.task_done()
results.join()
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
coo = coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')

Categories

Resources