Writing a standard deviation function - python

I have a dictionary of words as keys and ints as value. It outputs as such:
print (word_ratings_dict)
{'hate': [1, 2, 2, 1, 1, 3, 0, 2, 3, 2, 0, 4, 1, 1], 'joy': [3, 4, 3, 3, 2, 4, 1]}
For each key word in the dictionary, I need to calculate its standard deviation WITHOUT using the statistics module.
Heres what I have so far:
def menu_validate(prompt, min_val, max_val):
""" produces a prompt, gets input, validates the input and returns a value. """
while True:
try:
menu = int(input(prompt))
if menu >= min_val and menu <= max_val:
return menu
break
elif menu.lower == "quit" or menu.lower == "q":
quit()
print("You must enter a number value from {} to {}.".format(min_val, max_val))
except ValueError:
print("You must enter a number value from {} to {}.".format(min_val, max_val))
def open_file(prompt):
""" opens a file """
while True:
try:
file_name = str(input(prompt))
if ".txt" in file_name:
input_file = open(file_name, 'r')
return input_file
else:
input_file = open(file_name+".txt", 'r')
return input_file
except FileNotFoundError:
print("You must enter a valid file name. Make sure the file you would like to open is in this programs root folder.")
def make_list(file):
lst = []
for line in file:
lst2 = line.split(' ')
del lst2[-1]
lst.append(lst2)
return lst
def rating_list(lst):
'''iterates through a list of lists and appends the first value in each list to a second list'''
rating_list = []
for list in lst:
rating_list.append(list[0])
return rating_list
def word_cnt(lst, word : str):
cnt = 0
for list in lst:
for word in list:
cnt += 1
return cnt
def words_list(file):
lst = []
for word in file:
lst.append(word)
return lst
def word_rating(word, ratings_lst):
'''finds ratings for a word and appends them to a dictionary of words'''
lst = []
for line in ratings_lst:
line = line.split()
if word in line:
rating = line[0]
lst.append(int(rating))
return lst
cnt_list = []
while True:
menu = menu_validate("1. Get sentiment for all words in a file? \nQ. Quit \n", 1, 1)
if menu == True:
ratings_file = open("sample.txt")
ratings_list = make_list(ratings_file)
word_ratings_dict = {}
word_avg_dict = {}
std_dev_dict = {}
word_file = open_file("Enter the name of the file with words to score \n")
word_list = words_list(word_file)
for word in word_list:
#counts the words
cnt = word_cnt(ratings_list, word)
cnt_dict[word] = cnt
word_ratings_dict[word] = word_rating(word, ratings_list)
total_rating = 0
for i in range (0, cnt):
total_rating += word_ratings_dict[word][i]
word_avg_dict[word] = total_rating/cnt
std_dev_dict[word] =

These will do the job nicely:
def mean(data):
return float(sum(data) / len(data))
def variance(data):
mu = mean(data)
return mean([(x - mu) ** 2 for x in data])
def stddev(data):
return sqrt(variance(data))

or you can do it in one function :
data = []
def StdDev( data ):
suma = 0
for x in data: suma += ((x - sum(data)/len(data))**2) # every iteration subtracks the mean value of a list [sum(data)/len(data)] from every [x] element of a list and then raises to the second power [**2]
print((suma/(len(data)-1))**0.5) # then divide by n-1 [len(data)-1] and extract the root of 2 [**0.5]

Related

Need some help properly sorting a BFS tree into two separate sublists

I have the code
from collections import deque
def bubbles(physical_contact_info):
"""takes physical contact info about a group of people and determines the bubbles."""
result = []
contacted = []
non_contacted = []
adj = adjacency_list(physical_contact_info)
j = 0
print(j)
print(adj)
for x in range(len(adj)-1, -1, -1):
info = dfs_tree(adj, j)
print(info)
i = len(info)-1
while info[i] != None:
contacted.insert(0, i)
i = info[i]
#print(i)
#print(contacted)
if info[i] == None:
non_contacted.insert(0, i)
if len(non_contacted) != 0:
result.append(non_contacted)
if len(contacted) != 0:
result.append(contacted)
return result
def adjacency_list(graph_str):
""">"""
splitted = graph_str.splitlines()
header = splitted[0]
number_of_vertices = int(header.split()[1])
adjacent = [[] for vertex in range(number_of_vertices)]
#check if weighted, directed
if header[-1] == "W":
is_weighted = True
else:
is_weighted = False
if is_weighted:
if header[0] == "D":
for line in splitted[1:]:
new_line = line.split()
adjacent[int(new_line[0])].append((int(new_line[1]), int(new_line[2])))
else:
for line in splitted[1:]:
new_line = line.split()
adjacent[int(new_line[0])].append((int(new_line[1]), \
int(new_line[2])))
adjacent[int(new_line[1])].append((int(new_line[0]),\
int(new_line[2])))
else:
if header[0] == "D":
for line in splitted[1:]:
new_line = line.split()
adjacent[int(new_line[0])].append((int(new_line[1]), None))
else:
for line in splitted[1:]:
new_line = line.split()
adjacent[int(new_line[0])].append((int(new_line[1]), None))
adjacent[int(new_line[1])].append((int(new_line[0]),None))
return adjacent
def dfs_tree(adj_list, start):
"""."""
number_of_vertices = len(adj_list)
state = ["U" for i in range(number_of_vertices)]
parent = [None for i in range(number_of_vertices)]
state[start] = "D"
new = dfs_loop(adj_list, start, state, parent)
return parent
def dfs_loop(adj_list, u, state, parent):
"""."""
for v, weight in adj_list[u]:
if state[v] == "U":
state[v] = "D"
parent[v] = u
dfs_loop(adj_list, v, state, parent)
state[u] = "P"
physical_contact_info = """\
U 2
0 1
"""
print(sorted(sorted(bubble) for bubble in bubbles(physical_contact_info)))
#Should be returning [[0, 1]], currently getting [[0, 1], [1]]
I am trying to get everything so that at the end it will return a list consisting of two sublists, one of every vertex that is not connected to another ("Uncontacted" if you will) and another that contains every vertex that does have a connection to another ("Contacted"). I am having trouble getting the contents of the tree to insert into the proper lists, as I do not know what to do to get these vertices to append to the correct list. Help would be greatly appreciated!
Other test cases
Case 1:
physical_contact_info = """
U 2
"""
print(sorted(sorted(bubble) for bubble in bubbles(physical_contact_info)))
Output should be:
[[0], [1]]
Case 2:
physical_contact_info = """
U 7
1 2
1 5
1 6
2 3
2 5
3 4
4 5
"""
print(sorted(sorted(bubble) for bubble in bubbles(physical_contact_info)))
Output should be:
[[0], [1, 2, 3, 4, 5, 6]]
Case 3:
physical_contact_info = """
U 0
"""
print(sorted(sorted(bubble) for bubble in bubbles(physical_contact_info)))
Output should be:
[]
Case 4:
physical_contact_info = """
U 1
"""
print(sorted(sorted(bubble) for bubble in bubbles(physical_contact_info)))
Output should be:
[[0]]
Have been thinking of a for loop at the moment, but am still unsure if that would help or if it would be necessary.

Python print with integer string mix and group matching problem

import re
import string
line = "F1,F2,F3,F6,R1,R3,R341,C1,C10,C2,FL1,FL5"
line_no_digits= re.findall("\D",line)#produce line with refernces F,F,F,F,R,R,R,C,C,C,FL,FL
line_no_digits_string= ''.join(map(str,line_no_digits))
#############################################################
res = line_no_digits_string.split(",")
alpha_char_unique=[]
for i in res: #get unique reference prefix
if i not in alpha_char_unique:
alpha_char_unique.append(i)
#############################################################
line2 = line.split(",")
preLine = ""
result = ""
for j in alpha_char_unique:
p = re.compile(j)
preLine = [s for s in line2 if p.match(s)]
numbers_only = [int(re.search('\d+', l).group(0)) for l in preLine if re.search('\d+', l) is not None]#pull numbers for matches or prefix, not working correctly
numbers_only.sort()#sort the numbers to work in code below
#print(numbers_only)
c = 0#counter
result = ""#output result
current = None
run = False
x = len(numbers_only)
while c < x:#number sorter
if current == None:
result += f'{numbers_only[c]}'
elif numbers_only[c] - current == 1:
run = True
if c == len(numbers_only) - 1:
result += f'-{numbers_only[c]}'
elif run:
result += f'-{current},{numbers_only[c]}'
run = False
else:
result += f',{numbers_only[c]}'
current = numbers_only[c]
c += 1
print(result)
The result is
1,1-3,5-6
1,3,341
1-2,10
1,5
I would like it to be
F1-F3,F6
R1,R3,R341
C1-C2,C10
FL1,FL5
matching may not be the best for this. I am looking for suggestions on how to make sure that F1-F3 and FL1,FL5 are not grouped together. I am open to suggestions.
This does it.
import re
import string
from collections import OrderedDict
line = "F1,F2,F3,F6,R1,R3,R341,C1,C10,C2,FL1,FL5"
result = []
def add_result(result,last_letters,starting_num,latest_num):
if last_letters is None:
return
if latest_num > starting_num:
result.append('%s%d-%s%d' % (last_letters,starting_num,last_letters,latest_num))
else:
result.append('%s%d' % (last_letters,starting_num))
letter_map = OrderedDict()
for letters,number in re.findall('([A-Z]+)(\\d+)',line):
number = int(number)
if not letters in letter_map:
letter_map[letters] = []
letter_map[letters].append(number)
last_letters = starting_num = latest_num = None
for letters,numbers in letter_map.items():
for number in sorted(numbers):
if last_letters is None:
last_letters = letters
starting_num = latest_num = number
elif letters == last_letters:
if number == latest_num + 1:
latest_num = number
else:
add_result(result,last_letters,starting_num,latest_num)
starting_num = latest_num = number
else:
add_result(result,last_letters,starting_num,latest_num)
last_letters = letters
starting_num = latest_num = number
add_result(result,last_letters,starting_num,latest_num)
final_result = ','.join(result)
print(final_result) #F1-F3,F6,R1,R3,R341,C1-C2,C10,FL1,FL5
Edit:
For your modified version from your comment, I think you have a couple of typos: "R1,2" should be "R1.2" and "F2.," should be "F2,". Let me know if those are not typos. Assuming they are, the minimal update to get it working is to accept more than just letters as the prefix, but also allow trailing numbers if they end with a dot. Also allow an underscore. The only thing that needs to change is the regular expression in the re.findall line.
import re
import string
from collections import OrderedDict
line = "F1.1,F1.2,F1.3.F2,F3,F6,R1.1,R1.2,R1.3,R2,R3,R341,C1,C10,C2,FL_1,FL_5"
result = []
def add_result(result,last_letters,starting_num,latest_num):
if last_letters is None:
return
if latest_num > starting_num:
result.append('%s%d-%s%d' % (last_letters,starting_num,last_letters,latest_num))
else:
result.append('%s%d' % (last_letters,starting_num))
letter_map = OrderedDict()
for letters,number in re.findall('([A-Z]+_{0,1}(?:[0-9]+\\.){0,1})(\\d+)',line):
number = int(number)
if not letters in letter_map:
letter_map[letters] = []
letter_map[letters].append(number)
last_letters = starting_num = latest_num = None
for letters,numbers in letter_map.items():
for number in sorted(numbers):
if last_letters is None:
last_letters = letters
starting_num = latest_num = number
elif letters == last_letters:
if number == latest_num + 1:
latest_num = number
else:
add_result(result,last_letters,starting_num,latest_num)
starting_num = latest_num = number
else:
add_result(result,last_letters,starting_num,latest_num)
last_letters = letters
starting_num = latest_num = number
add_result(result,last_letters,starting_num,latest_num)
final_result = ','.join(result)
print(final_result) #F1.1-F1.3,F2-F3,F6,R1.1-R1.3,R2-R3,R341,C1-C2,C10,FL_1,FL_5```

CS50 DNA - it works for small.csv but not for large

https://cs50.harvard.edu/x/2020/psets/6/dna/#:~:text=python%20dna.py%20databases/large.csv%20sequences/5.txt
I'm trying to solve this problem from CS50 but it just works for the small database, when I try it for the large one the program overcounts.
import csv
if len(argv) != 3:
print("DIGITA DIREITO, IMBECIL")
exit()
with open(argv[1], "r") as source:
reader = list(csv.reader(source))
reader[0].remove("name")
i = reader[0]
with open(argv[2], "r") as sequence:
seq = sequence.read()
values = []
for j in range(len(i)):
value = 0
counter = 0
pos = 0
prevpos = 0
while pos < len(seq):
pos = seq.find(i[j], pos)
if pos == -1:
counter = 0
break
elif (pos != 1):
counter += 1
prevpos = pos
pos += len(i[j])
if value < counter:
value = counter
values.append(value)
for row in range(len(reader)):
print(reader[row])
print(values)
values = list(map(str, values))
search = list(reader)
search.pop(0)
for result in search:
if result[1:] == values:
print(f"{result[0]}")
break
elif result == search[-1]:
print("No match")
I think you are just counting the STRs repetitions in the sequence, not the maximum consecutive STR repetitions. This is what the problem asks

How to sort a list that has strings and ints by size then output either int or string in python 3

I have this code, which will sort the inputs, but the only way it works is by the inputs being sorted before they are entered into the list. I want to re write the code to take any list and output the word or number, whichever happens to be second largest.
def secound_largest(values: {}):
sorted_values = sorted(values.items(), key=lambda kv: kv[1], reverse=True)
second_maximum = list(sorted_values)[1][0]
print(str(second_maximum)+ 'is the second largest item on the list')
if __name__ == '__main__':
list_input_amount = int(input('How many items are in your list? '))
dictonary_values = {}
for amount in range(list_input_amount):
list_input = input('Please enter your list item: ')
if list_input.isnumeric():
dictonary_values[int(list_input)] = int(list_input)
else:
dictonary_values[list_input] = len(list_input)
secound_largest(dictonary_values)
The following code will do what you want:
def argmax(subscriptable):
_max = subscriptable[0]
_max_inv = 0
for idx in range(1, len(subscriptable)):
elem = subscriptable[idx]
if elem > _max:
_max = elem
_max_inv = idx
return _max_inv
def second_large_arg(subscriptable):
big1 = subscriptable[0]
big2 = subscriptable[1]
big1_inv = 0
big2_inv = 1
if big1 < subscriptable[1]:
big1 = subscriptable[1]
big2 = subscriptable[0]
big1_inv = 1
big2_inv = 0
for idx in range(2, len(subscriptable)):
elem = subscriptable[idx]
if elem > big1:
big2_inv = big1_inv
big1 = elem
big1_inv = idx
elif elem > big2:
big2 = elem
big2_inv = idx
return big2_inv
d = {
0:23,
1:83,
2:999999999999999999999999999999,
3:87,
4:91,
5:32111111,
6:21
}
print(argmax(d)) # prints 2
print(second_large_arg(d)) # prints 5
L = {
0:"apple",
1:1,
2:"SUBDERMATOGLYPHIC",
3:"banana",
4:99999999999999,
5:2
}
L = [len(x) if hasattr(x, "__len__") else x for x in L.values()]
print(L)
print(argmax(L)) # prints 4
print(second_large_arg(L)) # prints 2
list2= []
list= [3, 5, 'python', 2, -1, 7]
for i in range(len(list)):
try:
item=[len(list[i]),list.index(list[i])]
list2.append(item)
except TypeError:
item= [list[i],list.index(list[i])]
list2.append(item)
list2.sort(key=lambda x:x[0],reverse=True)
print(list[list2[1][1]])

python: taking the return of a function and using it in another function

Hi guys i got my code to work by putting everything in one function which is this
spam = ''
def enterList (names):
newList = []
while True:
names = raw_input('list a series of items and press blank when finished: ')
if names == '':
break
newList = newList + [names]
a = ''
finalText = ''
listOfStuff = []
item = 0
for i in newList:
if item < len(newList)-2:
a = (i + ', ')
listOfStuff.append(a)
item +=1
elif item == len(newList)-2:
a = (i + ' and ')
listOfStuff.append(a)
item +=1
else:
a = i
listOfStuff.append(a)
break
finalText = finalText.join(listOfStuff)
return finalText
print enterList(spam)
So the above code works as i want it to. However i was trying to do the same thing by having two separate functions, the issue that i was having was that i couldn't take the return value of one function and use it in the next function.
This is the old code
spam = ''
def enterList (names):
newList = []
while True:
names = raw_input('list a series of items and press blank when finished: ')
if names == '':
break
newList = newList + [names]
return newList
print enterList(spam)
def newFunc(Addand):
a = ''
finalText = ''
listOfStuff = []
item = 0
for i in spam:
if item < len(spam)-2:
a = (i + ', ')
listOfStuff.append(a)
item +=1
elif item == len(spam)-2:
a = (i + ' and ')
listOfStuff.append(a)
item +=1
else:
a = i
listOfStuff.append(a)
break
finalText = finalText.join(listOfStuff)
return finalText
newFunc(spam)
print newFunc (spam)
I'm not sure what I was doing wrong doing it this way.
Thanks for any help to get my head around the error with this approach.
In your first function make the return statement
return newFunc(newlist)
It's not working because the second function is never actually called.

Categories

Resources