How can I arrange a list, with bubble sort, but in descending order?
I searched in other topics, but I couldn't find an answer.
This is my working implementation of Bubblesort code:
from timeit import default_timer as timer
import resource
start = timer()
def bubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]>alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
with open('lista.txt', 'r') as f:
long_string = f.readline()
alist = long_string.split(',')
bubbleSort(alist)
f = open("bubble.txt", "w")
print >>f,(alist)
print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
end = timer()
print(end - start)
f.close()
You need to replace the greater than in the following if statement if alist[i]<alist[i+1]: with a smaller than if alist[i]<alist[i+1]:. You also need to return alist leaving you with the following.
def bubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]<alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
return(alist)
Write like this alist[i]<alist[i+1]
Related
I want to make an inverted index using multiprocessing to speed up its work. My idea is to split the files into groups, and each process will build its own inverted index, and then I want to merge all these indexes into one inverted index. But I don't know how to return them to the main process that will merge them.
import multiprocessing as mp
from pathlib import Path
import re
import time
class InvertedIndex:
def __init__(self):
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt'))
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num
processes = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
currLi = pathList[startIndex:endIndex]
p = mp.Process(target=self.oneProcessTask, args=(currLi,))
processes.append(p)
[x.start() for x in processes]
[x.join() for x in processes]
#staticmethod
def oneProcessTask(listOfDoc):
#print(f'Start: {list[0]}, end: {list[-1]}') # temp
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
def getListOfDoc(self, keyWord):
return self.index[keyWord]
if __name__ == '__main__':
ii = InvertedIndex()
start_time = time.time()
ii.createIndex()
print("--- %s seconds ---" % (time.time() - start_time))
I used multiprocessing.manager to write everything in one dictionary, but that solution was too slow. So I went back to the idea of creating own inverted index for each process and then merging them. But I don't know how to return all indexes to one process.
Take a look at concurrent.futures (native library) with either ThreadPoolExecutor or ProcessPoolExecutor. FYI: I wrote on that in here and did not test but, this is more or less the jist of what I use all the time.
from concurrent.futures import ThreadPoolExecutor, as_completed
def foo(stuff: int) -> dict:
return {}
things_to_analyze = [1,2,3]
threads = []
results = []
with ThreadPoolExecutor() as executor:
for things in things_to_analyze:
threads.append(executor.submit(foo, thing))
for job in as_completed(threads):
results.append(job.results())
I found a solution. I used pool.starmap to return a list of indexes.
My code:
class InvertedIndex:
def __init__(self):
self.smallIndexes = None
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt')) # Рекурсивно проходимо по всіх текстових файлах і робимо з них список
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num # Розраховуємо скільки файлів має обробити один процес
processes_args = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
processes_args.append((path, startIndex, endIndex))
pool = mp.Pool(threads_num)
self.smallIndexes = pool.starmap(self.oneProcessTask, processes_args)
self.mergeIndex()
#staticmethod
def oneProcessTask(path, startIndex, endIndex):
pathList = list(Path(path).glob('**/*.txt'))
listOfDoc = pathList[startIndex:endIndex]
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
return tempDict
Execution time decreased from 200 seconds (when I used shared memory and menger.dict ) to 0.8 seconds (when I used pool.starmap).
I have a instance with 16-core processor and I have a while loop like below,
count = 200000
num = 0
pbar = tqdm(total=count)
lst = []
while num <= count:
random_folder = os.path.join(path, np.random.choice(os.listdir(path)))
file_path = os.path.join(path, np.random.choice(os.listdir(random_folder)))
if not os.path.isdir(file_path):
lst.append(file_path)
pbar.update(1)
num += 1
When I tried to run this code on a server, the estimated time is really long
0%| | 138/200000 [02:14<51:25:11, 1.08it/s]
I have tried to use numpy to get random choice but it's still slow. Is there any way I can take advantage of my multi-core cpu and speed up this while loop? It's just collecting random files from sub folders. Really appreciate any help. Thanks
Update:
path = "/home/user12/pdf_files"
def get_random_file(num_of_files):
count = 0
random_files = []
while count < num_of_files:
random_folder = os.path.join(path, random.choice(os.listdir(path)))
file_path = os.path.join(path, random.choice(os.listdir(random_folder)))
if not os.path.isdir(file_path):
resumes_list.append(file_path)
count += 1
return random_files
with Pool(16) as p:
random_files = p.map(get_random_file, (1000/16,))
You can use multi processing and use all cores at the same time.
See https://docs.python.org/3.8/library/multiprocessing.html
Something like this:
from multiprocessing import Pool
def get_random_file(num_of_files):
# your logic goes here
count = 0
random_files = []
while count < num_of_files:
count += 1
pass
#get random file and append to 'random_files'
return random_files
if __name__ == '__main__':
with Pool(16) as p:
num_of_files = [200000/16 for i in range(1,16)]
random_files = p.map(get_random_file,num_of_files)
# random_files is a list of lists - you need to merge them into one list
I keep getting the error when running my code:
TypeError: object of type '_io.TextIOWrapper' has no len() function
How do I get it to open/read the file and run it through the loop?
Here's a link to the file that I am trying to import:
download link of the DNA sequence
def mostCommonSubstring():
dna = open("dna.txt", "r")
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
The problem occurs due to the way you are opening the text file.
You should add dna = dna.read() to your code.
so your end code should look something like this:
def mostCommonSubstring():
dna = open("dna.txt", "r")
dna = dna.read()
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
#tfabiant : I suggest this script to read and process a DNA sequence.
To run this code, in the terminal: python readfasta.py fastafile.fasta
import string, sys
##########I. To Load Fasta File##############
file = open(sys.argv[1])
rfile = file.readline()
seqs = {}
##########II. To Make fasta dictionary####
tnv = ""#temporal name value
while rfile != "":
if ">" in rfile:
tnv = string.strip(rfile)
seqs[tnv] = ""
else:
seqs[tnv] += string.strip(rfile)
rfile = file.readline()
##############III. To Make Counts########
count_what = ["A", "T", "C", "G", "ATG"]
for s in seqs:
name = s
seq = seqs[s]
print s # to print seq name if you have a multifasta file
for cw in count_what:
print cw, seq.count(cw)# to print counts by seq
Having issues with code. was given a file called "racing.csv" that stored the variables found in the "Drive" class. Concept behind the problem is that the program should sort the racetimes (lowest to highest) and assign points to the top 3 racers then export this data to a new file. All code is working fine aside from when I'm calling the shortBubbleSort on Drive and isn't sorting the racetimes correctly. Help is appreciated.
import csv
class Drive(object):
driver = ""
team = ""
racetime = 0.0
points = 0
def __init__(self,driver,team,racetime,points):
self.driver = driver
self.team = team
self.racetime = racetime
self.points = points
f = open('racing.csv', 'r')
csv_f = list(csv.reader(f))
driverclasses = []
for i in range(len(csv_f)):
d = Drive(csv_f[i][0],csv_f[i][1],csv_f[i][2],csv_f[i][3])
driverclasses.append(d)
for row in csv_f:
print (row)
for x in range(0, 6):
csv_f[x][2]=(input("Enter Racetime"))
def shortBubbleSort(alist):
exchanges = True
passnum = len(alist)-1
while passnum > 0 and exchanges:
exchanges = False
for i in range(passnum):
if alist[i]>alist[i+1]:
exchanges = True
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
passnum = passnum-1
shortBubbleSort(Drive)
print(csv_f)
csv_f[0][3] = 25
csv_f[1][3] = 18
csv_f[2][3] = 15
f = open('RacingResults.csv', 'w')
for row in csv_f:
print (row)
Does this help?
**range function sintax*: range([start], stop[, step])
start: Starting number of the sequence.
stop: Generate numbers up to, but not including this number.
step: Difference between each number in the sequence.
def shortBubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]>alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
def selectionSort(lst):
with lst as f:
nums = [int(line) for line in f]
for i in range(len(nums) - 1, 0, -1):
maxPos = 0
for position in range(1, i + 1):
if nums[position] > nums[maxPos]:
maxPos = position
value = nums[i]
nums[i] = nums[maxPos]
nums[maxPos] = value
def main():
textFileName = input("Enter the Filename: ")
lst = open(textFileName)
selectionSort(lst)
print(lst)
main()
Okay, thanks to hcwhsa for helping me out with the reading file and putting them all in one line.
When I run that code, i get this following error:
<_io.TextIOWrapper name='numbers.txt' mode='r' encoding='UTF-8'>
textfile:
67
7
2
34
42
Any help? Thanks.
You should return the list from the function and assign it to a variable and then print it.
def selectionSort(lst):
with lst as f:
nums = [int(line) for line in f]
...
...
return nums
sorted_lst = selectionSort(lst)
print(sorted_lst)
Your code didn't work because instead of passing the list you passed the file object to the function. This version of your code passes the list to the function, so no return value is required as you're modifying the same list object:
def selectionSort(nums):
for i in range(len(nums) - 1, 0, -1):
maxPos = 0
for position in range(1, i + 1):
if nums[position] > nums[maxPos]:
maxPos = position
value = nums[i]
nums[i] = nums[maxPos]
nums[maxPos] = value
def main():
textFileName = input("Enter the Filename: ")
with open(textFileName) as f:
lst = [int(line) for line in f]
selectionSort(lst)
print(lst)
main()