this is my code
ok i know my code have o^234234324 complexity algorithmically but
its work for all sequences except sequences/15.txt and sequences/16.txt
import sys
import csv
if len(sys.argv) != 3:
print("useage: filenameofdata.cvs filenameofsequence.txt")
sys.exit(1)
with open(sys.argv[1], "r") as datafile:
readdata = list(csv.reader(datafile))
with open(sys.argv[2], "r") as sequencefile:
readsequence = list(csv.reader(sequencefile))
strs = list(readdata[0][1:])
conlist = []
dnanum = 0
for move in (strs):
sequence = list(readsequence[0][0])
consecutively = 0
l = len(move)
cursor = [None] * 2
temp = [None] * l
x = 0
counter = 0
while counter == 0:
if sequence == []:
conlist.append(consecutively)
break
for oneletter in (sequence):
if x < 2:
cursor[x] = oneletter
temp[x] = oneletter
x += 1
if x == l:
asstring = ''.join(map(str, temp))
if asstring == move:
dnanum += 1
move
temp = [None] * l
x = 0
continue
else:
if consecutively < dnanum:
consecutively = dnanum
oneletter = sequence.remove(cursor[0])
temp = [None] * l
x = 0
dnanum = 0
break
# this print was for check if i got the right str consecutively
print(conlist)
conlist = ''.join(map(str, conlist))
for y in readdata:
x = ''.join(map(str, y[1:]))
if conlist == x:
print(y[0])
sys.exit(1)
print("No match")
when i try debug it in sequences/15.txt and sequences/16.txt or if i try to run them i got no output
the massage error when debug
~/pset6/dna/ $ debug50 python dna.py databases/large.csv sequences/15.txt
Traceback (most recent call last):
File "/usr/local/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/local/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/site-packages/ikp3db.py", line 2105, in <module>
ikp3db.main()
File "/usr/local/lib/python3.7/site-packages/ikp3db.py", line 2011, in main
debug_socket.bind((cmd_line_args.IKPDB_ADDRESS, cmd_line_args.IKPDB_PORT,))
OSError: [Errno 98] Address already in use
That's too complicated. To get maximum number of consecutive STRs for each STR, I only write 6 lines of code:
for i in range(1, len(data[0])): # loop through all STR
count = 1
string = data[0][i] # assign each STR to a string
while string * count in dna: # if find 1 string, then try to find string*2, and so on
count += 1
counts.append(str(count - 1)) # should be decreased by 1 as initialized to 1
Related
Basically i want to parallize the output from function _spectrum_generator. I think _specturm_generator yield iterator. In gather_data i am using multiprocessing. For n_process=1 it is working fine but for n_process>1 it is giving TypeError: cannot pickle '_io.BufferedReader' object. gather_data function is called _save_lcms_data_feather. I am using spec_function to process the output of each _specturm_generator.
from utils import _spectrum_generator
from utils import _get_scan_polarity
from multiprocessing import Pool
from functools import partial
# Enum for polarity
POLARITY_POS = 1
POLARITY_NEG = 2
all_mz = []
all_rt = []
all_polarity = []
all_i = []
all_scan = []
all_index = []
spectrum_index = 0
number_spectra = 0
all_msn_mz = []
all_msn_rt = []
all_msn_polarity = []
all_msn_scan = []
all_msn_level = []
def spec_function(spec, min_rt, max_rt, min_mz, max_mz, polarity_filter="None", top_spectrum_peaks=100, include_polarity=False):
global spectrum_index
global number_spectra
global all_mz
global all_rt
global all_polarity
global all_i
global all_scan
global all_index
global all_msn_mz
global all_msn_rt
global all_msn_polarity
global all_msn_scan
global all_msn_level
rt = spec.scan_time_in_minutes()
try:
# Still waiting for the window
if rt < min_rt:
return None
# We've passed the window
if rt > max_rt:
return None
except:
pass
if polarity_filter == "None":
pass
else:
scan_polarity = _get_scan_polarity(spec)
if polarity_filter != scan_polarity:
return None
if spec.ms_level == 1:
spectrum_index += 1
number_spectra += 1
try:
# Filtering peaks by mz
if min_mz <= 0 and max_mz >= 2000:
peaks = spec.peaks("raw")
else:
peaks = spec.reduce(mz_range=(min_mz, max_mz))
# Filtering out zero rows
peaks = peaks[~np.any(peaks < 1.0, axis=1)]
# Sorting by intensity
peaks = peaks[peaks[:,1].argsort()]
peaks = peaks[-1 * top_spectrum_peaks:]
mz, intensity = zip(*peaks)
all_mz += list(mz)
all_i += list(intensity)
all_rt += len(mz) * [rt]
all_scan += len(mz) * [spec.ID]
all_index += len(mz) * [number_spectra]
# Adding polarity
if include_polarity is True:
scan_polarity = _get_scan_polarity(spec)
if scan_polarity == "Positive":
all_polarity += len(mz) * [POLARITY_POS]
else:
all_polarity += len(mz) * [POLARITY_NEG]
except:
pass
elif spec.ms_level > 1:
try:
msn_mz = spec.selected_precursors[0]["mz"]
if msn_mz < min_mz or msn_mz > max_mz:
return None
all_msn_mz.append(msn_mz)
all_msn_rt.append(rt)
all_msn_scan.append(spec.ID)
all_msn_level.append(spec.ms_level)
# Adding polarity
if include_polarity is True:
scan_polarity = _get_scan_polarity(spec)
if scan_polarity == "Positive":
all_msn_polarity.append(POLARITY_POS)
else:
all_msn_polarity.append(POLARITY_NEG)
except:
pass
def gather_data(filename, min_rt, max_rt, min_mz, max_mz, polarity_filter="None", top_spectrum_peaks=100, include_polarity=False, n_processes=1):
# Iterating through all data with a custom scan iterator
# It handles custom bounds on RT
if n_processes == 1:
for spec in _spectrum_generator(filename, min_rt, max_rt):
spec_function(spec, min_rt, max_rt, min_mz, max_mz, polarity_filter, top_spectrum_peaks, include_polarity)
else:
pool = Pool(n_processes)
pool.map(partial(spec_function, min_rt=min_rt, max_rt=max_rt, min_mz=min_mz, max_mz=max_mz, polarity_filter=polarity_filter, top_spectrum_peaks=top_spectrum_peaks, include_polarity=include_polarity),_spectrum_generator(filename, min_rt, max_rt))
pool.close()
pool.join()
return all_mz, all_rt
def _save_lcms_data_feather(filename):
all_mz_,all_rt_ =gather_data(filename,0,1000000,0,10000,polarity_filter="None",top_spectrum_peaks=100000,include_polarity=True,n_processes=2)
print("Number of spectra: " + str(len(all_mz_)))
print(all_mz_[1:10])
_save_lcms_data_feather("/home/ashish/GNPS_LCMSDashboard/QC_0.mzML")
Below is the code of _spectrum_generator function:
def _spectrum_generator(filename, min_rt, max_rt):
run = pymzml.run.Reader(filename, MS_precisions=MS_precisions)
# Don't do this if the min_rt and max_rt are not reasonable values
if min_rt <= 0 and max_rt > 1000:
for spec in run:
yield spec
else:
try:
min_rt_index = _find_lcms_rt(run, min_rt) # These are inclusive on left
max_rt_index = _find_lcms_rt(run, max_rt) + 1 # Exclusive on the right
for spec_index in tqdm(range(min_rt_index, max_rt_index)):
spec = run[spec_index]
yield spec
print("USED INDEX")
except:
run = pymzml.run.Reader(filename, MS_precisions=MS_precisions)
for spec in run:
yield spec
print("USED BRUTEFORCE")
Here is ERROR log:
raceback (most recent call last):
File "/home/ashish/GNPS_LCMSDashboard/lcms_map.py", line 314, in <module>
_save_lcms_data_feather("/home/ashish/GNPS_LCMSDashboard/QC_0.mzML")
File "/home/ashish/GNPS_LCMSDashboard/lcms_map.py", line 270, in _save_lcms_data_feather
all_mz_,all_rt_ =gather_data(filename,0,1000000,0,10000,polarity_filter="None",top_spectrum_peaks=100000,include_polarity=True,n_processes=2)
File "/home/ashish/GNPS_LCMSDashboard/lcms_map.py", line 147, in gather_data
pool.map(partial(spec_function, min_rt=min_rt, max_rt=max_rt, min_mz=min_mz, max_mz=max_mz, polarity_filter=polarity_filter, top_spectrum_peaks=top_spectrum_peaks, include_polarity=include_polarity),_spectrum_generator(filename, min_rt, max_rt))
File "/home/ashish/miniconda3/lib/python3.9/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/home/ashish/miniconda3/lib/python3.9/multiprocessing/pool.py", line 771, in get
raise self._value
File "/home/ashish/miniconda3/lib/python3.9/multiprocessing/pool.py", line 537, in _handle_tasks
put(task)
File "/home/ashish/miniconda3/lib/python3.9/multiprocessing/connection.py", line 211, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/ashish/miniconda3/lib/python3.9/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle '_io.BufferedReader' object
I want to know how can i get rid of this error means how can i correct it. And i want to know reason behind this error.
I have also tried TypeError("cannot pickle '_io.BufferedReader' object") but it seem it is not working in my case.
So i have been working on this code for a while and i cant fin a solution to this problem and i was wondering if anyone in here could help me solve this? The problem is supposed to be in the method "hent_celle" where it takes in a coordinate from a grid and returns the object that is in that position.
The error is:
Traceback (most recent call last):
File "/Users/cc/Documents/Python/Oblig8/rutenettto.py", line 105, in <module>
print(testobjekt._sett_naboer(2,1))
File "/Users/cc/Documents/Python/Oblig8/rutenettto.py", line 64, in
_sett_naboer nabo_u_kol = self.hent_celle(rad+1,kol)
File "/Users/cc/Documents/Python/Oblig8/rutenettto.py", line 43, in hent_celle
return self._rutenett[rad][kol] IndexError: list index out of range
And the code is:
from random import randint
from celle import Celle
class Rutenett:
def init(self,rader,kolonner):
self._ant_rader = int(rader)
self._ant_kolonner = int(kolonner) self._rutenett = []
def _lag_tom_rad(self):
liste = []
for x in range(self._ant_kolonner):
liste.append(None)
return liste
def _lag_tomt_rutenett(self):
liste2 = []
for x in range(self._ant_rader):
liste_ = self._lag_tom_rad()
liste2.append(liste_)
self._rutenett = liste2
def lag_celle(self,rad,kol):
celle = Celle()
tilfeldig_tall = randint(0,100)
if tilfeldig_tall <= 33:
celle.sett_levende()
return celle
else:
return celle
def fyll_med_tilfeldige_celler(self):
for x in self._rutenett:
for y in x:
rad = int(self._rutenett.index(x))
kol = int(x.index(y))
self._rutenett[rad][kol] = self.lag_celle(rad,kol)
def hent_celle(self,rad,kol):
if rad > self._ant_rader or kol > self._ant_kolonner or rad < 0 or kol < 0:
return None
else:
return self._rutenett[rad][kol]
def tegn_rutenett(self):
for x in self._rutenett:
for y in x:
print(y.hent_status_tegn(), end="")
def hent_alle_celler(self):
liste = []
for x in self._rutenett:
for y in x:
liste.append(y)
return liste
def _sett_naboer(self,rad,kol):
cellen = self.hent_celle(rad,kol)
# lik linje
nabo_v_rad = self.hent_celle(rad,kol-1)
nabo_h_rad = self.hent_celle(rad,kol+1)
# under
nabo_u_kol = self.hent_celle(rad+1,kol)
nabo_u_kol_h = self.hent_celle(rad+1,kol+1)
nabo_u_kol_v = self.hent_celle(rad+1,kol-1)
# over
nabo_o_kol = self.hent_celle(rad-1,kol)
nabo_o_kol_h = self.hent_celle(rad-1,kol+1)
nabo_o_kol_v = self.hent_celle(rad-1,kol-1)
liste = [nabo_v_rad,nabo_h_rad,nabo_u_kol_h,nabo_u_kol_v,nabo_o_kol,nabo_o_kol_h,nabo_o_kol_v]
#print(liste)
#print(nabo_o_kol_h)
for x in liste:
if x == None:
pass
else:
cellen._naboer.append(x)
return cellen._naboer
def antall_levende(self):
teller = 0
for x in self._rutenett:
for y in x:
if y._status == "doed":
pass
else:
teller +=1
return teller
testobjekt = Rutenett(3,3)
testobjekt._lag_tomt_rutenett()
testobjekt.fyll_med_tilfeldige_celler()
print(testobjekt._sett_naboer(2,1))
I just cant figure out why the list index is out of range
Pyhton list indexes start at 0, which means a list with 10 elements will use indices 0-9. Assuming self._ant_rader and self._ant_kolonner are the number of rows and columns, then rad and kol would need to be less than those values and cannot be the same value, or you get an index out of bounds error.
Fixed version of the method:
def hent_celle(self,rad,kol):
if rad >= self._ant_rader or kol >= self._ant_kolonner or rad < 0 or kol < 0:
return None
else:
return self._rutenett[rad][kol]
As you can see, the > has been replaced with >= instead. This means indices which are out of bounds will return None.
I get an index -1 is out of bounds for axis 0 with size 0 error from scipy when trying to implement a text generator with ngrams.
Traceback (most recent call last):
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 125, in <module>
generate()
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 118, in generate
singleverse(int(c))
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 80, in singleverse
result = stats.multinomial.rvs(1, word_probabilities)
File "C:\Users\hp\PycharmProjects\N-gram poems\venv\lib\site-packages\scipy\stats\_multivariate.py", line 3242, in rvs
n, p, npcond = self._process_parameters(n, p)
File "C:\Users\hp\PycharmProjects\N-gram poems\venv\lib\site-packages\scipy\stats\_multivariate.py", line 3036, in _process_parameters
p[..., -1] = 1. - p[..., :-1].sum(axis=-1)
IndexError: index -1 is out of bounds for axis 0 with size 0
It's in a for loop and when the error occurs changes each time. Some times it does not occur at all. It mostly occur close to the end of the program.
This is the code where the error occurs:
def singleverse(num):
TrTrigrams = [((filtered_tokens[i], filtered_tokens[i + 1]), filtered_tokens[i + 2]) for i in
range(len(filtered_tokens) - 2)]
TrTrigramCFD = nltk.ConditionalFreqDist(TrTrigrams)
TrTrigramPbs = nltk.ConditionalProbDist(TrTrigramCFD, nltk.MLEProbDist)
rand = random.choice(random_choice_list)
start_word = ('<s>', rand)
data = []
for i in range(10):
probable_words = list(TrTrigramPbs[start_word].samples())
word_probabilities = [TrTrigramPbs[start_word].prob(word) for word in probable_words]
result = stats.multinomial.rvs(1, word_probabilities)
index_of_probable_word = list(result).index(1)
start_word = (start_word[1], (probable_words[index_of_probable_word]))
data.append(start_word[1])
line = []
for i in data:
if i != "<s>" and i != "</s>":
line.append(i)
poem_line = ' '.join([str(i) for i in line]).capitalize()
print(poem_line)
def generate():
"""Generates the final poem with user input of structure."""
print("What structure do you want?(e.g., 3 x 4, 2 x 4, 2 x 5): ")
while True:
try:
x, y, z = input().split()
except:
print("Enter the structure as shown above.")
continue
break
while True:
try:
for stanza in range(1):
for first_verse in range(1):
b = random.randint(7, 12)
firstverse(int(b))
for verse in range(int(z) - 1):
a = random.randint(7, 12)
singleverse(int(a))
print('\n')
for stanza in range(int(x) - 1):
for verse in range(int(z)):
c = random.randint(7, 12)
singleverse(int(c))
print('\n')
except KeyError:
print("This was not a valid seed word please try again.")
continue
break
generate()
def cipherText():
text = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
key = int(input("Enter numerical key--"))
word = str(input("Type word to be ciphered--"))
i = 0
k = 0
n = len(word)
print(n)
while n >= 0:
letter = word[i]
i = i + 1
while k <= 25:
textLetter = text[k]
if textLetter == letter:
givenLetter = letter
if k < (25 - key):
cipherLength = k + key
else:
cipherLength = k + key - 25
print(text[cipherLength])
k = k + 1
n = n - 1
cipherText()
WHEN I RUN THIS FOLLOWING MESSAGE POPS OUT:
Traceback (most recent call last): File "main.py", line 23, in
cipherText() File "main.py", line 10, in cipherText
letter=word[i] IndexError: string index out of range
You need to modify condition while n>=0:, as list starts with 0th index.
this line,
while n>=0:
should be,
while n-1>=0:
I don't know why I keep getting the error "string index out of range" and another error on line 47 print_data(data. Can someone please explain why? Thank you
def open_file():
user_input = input('Enter a file name: ')
try:
file = open(user_input, 'r')
return file
except FileNotFoundError:
return open_file()
def read_data(file):
counter = [0 for _ in range(9)]
for line in file.readlines():
num = line.strip()
if num.isdigit():
i = 0
digit = int(num[i])
while digit == 0 and i < len(num):
i += 1
digit = int(num[i])
if digit != 0:
counter[digit - 1] += 1
return counter
def print_data(data):
benford = [30.1, 17.6, 12.5, 9.7, 7.9, 6.7, 5.8, 4.1, 4.6]
header_str = "{:5s} {:7s}{:8s}"
data_str = "{:d}:{:6.1f}% ({:4.1f}%)"
total_count = sum(data)
print(header_str.format("Digit", "Percent", "Benford"))
for index, count in enumerate(data):
digit = index + 1
percent = 100 * count / total_count
print(data_str.format(digit, percent, benford[index]))
def main():
file = open_file()
data = read_data(file)
print_data(data)
file.close()
if __name__ == "__main__":
main()
This is the exact error I'm given
Traceback (most recent call last):
File "./lab08.py", line 52, in <module>
main()
File "./lab08.py", line 47, in main
data = read_data(file)
File "./lab08.py", line 26, in read_data
digit = int(num[i])
I believe the error stems from this:
while digit == 0 and i < len(num):
i += 1
digit = int(num[i])
If you swap the second two lines, you will properly index, i.e.:
while digit == 0 and i < len(num):
digit = int(num[i])
i += 1
If, for example, your string num is of length 10, then the final element is at index 9 (indexing from 0). for the first iteration of that loop, you will have digit be num[1], for the tenth iteration you would have it be num[10].
An alternative method would be to use list comprehension like this:
for n in num:
if digit != 0:
break
digit = int(n)