No module named 'msvcrt' - python

I am stuck in here (picture below) and need to pass this in order for it to read my text. Also I have text.txt file. The program itself won't run with python 2. With python 2 it gives me an error in here: print(last_suggestion, end=' ', flush=True).
train_data = 'text.txt'
first_possible_words = {}
second_possible_words = {}
transitions = {}
def expandDict(dictionary, key, value):
if key not in dictionary:
dictionary[key] = []
dictionary[key].append(value)
def get_next_probability(given_list): #returns dictionary
probability_dict = {}
given_list_length = len(given_list)
for item in given_list:
probability_dict[item] = probability_dict.get(item, 0) + 1
for key, value in probability_dict.items():
probability_dict[key] = value / given_list_length
return probability_dict
def trainMarkovModel():
for line in open(train_data):
tokens = line.rstrip().lower().split()
tokens_length = len(tokens)
for i in range(tokens_length):
token = tokens[i]
if i == 0:
first_possible_words[token] = first_possible_words.get(token, 0) + 1
else:
prev_token = tokens[i - 1]
if i == tokens_length - 1:
expandDict(transitions, (prev_token, token), 'END')
if i == 1:
expandDict(second_possible_words, prev_token, token)
else:
prev_prev_token = tokens[i - 2]
expandDict(transitions, (prev_prev_token, prev_token), token)
first_possible_words_total = sum(first_possible_words.values())
for key, value in first_possible_words.items():
first_possible_words[key] = value / first_possible_words_total
for prev_word, next_word_list in second_possible_words.items():
second_possible_words[prev_word] = get_next_probability(next_word_list)
for word_pair, next_word_list in transitions.items():
transitions[word_pair] = get_next_probability(next_word_list)
def next_word(tpl):
#print(transitions)
if(type(tpl) == str): #it is first word of string.. return from second word
d = second_possible_words.get(tpl)
if (d is not None):
return list(d.keys())
if(type(tpl) == tuple): #incoming words are combination of two words.. find next word now based on transitions
d = transitions.get(tpl)
if(d == None):
return []
return list(d.keys())
return None #wrong input.. return nothing
trainMarkovModel() #generate first, second words list and transitions
########## demo code below ################
print("Usage: start typing.. program will suggest words. Press tab to chose the first suggestion or keep typing\n")
import msvcrt #use of mscvrt to get character from user on real time without pressing enter
c=''
sent=''
last_suggestion=[]
while(c != b'\r'): #stop when user preses enter
if(c != b'\t'): #if previous character was tab, then after autocompletion dont wait for user inpput.. just show suggestions
c=msvcrt.getch()
else:
c = b' '
if(c != b'\t'): #dont print tab etc
print(str(c.decode('utf-8')), end=' ', flush=True)
sent = sent + str(c.decode('utf-8')) #create word on space
if(c == b' '):
tkns = sent.split()
if(len(tkns) < 2): #only first space encountered yet
last_suggestion = next_word(tkns[0].lower())
print(last_suggestion, end=' ', flush=True)
else: #send a tuple
last_suggestion = next_word((tkns[-2].lower(), tkns[-1].lower()))
print(last_suggestion, end=' ', flush=True)
if (c == b'\t' and len(last_suggestion) > 0): #print last suggestion on tab
print(last_suggestion[0], end=' ', flush=True)
sent = sent + " " + last_suggestion[0]
I am using Mac and running this in Visual Code and this is the error I get:
baylarbayramov#Baylars-MacBook-Pro markov-predict-next-word-master % python3 -u "markov_nextwordpred.py"
Usage: start typing.. program will suggest words. Press tab to chose the first suggestion or keep typing
Traceback (most recent call last):
File "markov_nextwordpred.py", line 69, in <module>
import msvcrt
ModuleNotFoundError: No module named 'msvcrt'

Related

How to turn a linear string into a trie?

I am using the make me a hanzi open-source chinese character dataset. As part of this dataset there are strings which provide the decomposition of chinese characters into their individual units (called radicals). I want to turn the strings describing the decomposition of characters into tries (so that I can use networkx to render the decompositions).
For example for this database entry:
{"character":"⺳","definition":"net, network","pinyin":[],"decomposition":"⿱冖八","radical":"⺳","matches":[[0],[0],[1],[1]]}
The decomposition for this character would be.
- Node(1, char='⿱')
- Node(2, char='冖') # an edge connects '⿱' to '冖'
- Node(3, char='八') # an edge connects '⿱' to '八'
So far, I have come up with a script to turn the string decompositions into dictionaries (but not graphs).
decomposition_types = {
'top-bottom': '⿱',
'left-right': '⿰',
'diagonal-corners': '⿻',
'over-under': '⿺',
'under-over': '⿹',
'over-under-reversed': '⿸',
'top-bottom-middle': '⿳',
'left-right-middle': '⿲',
'inside-outside': '⿴',
'outside-inside': '⿵',
'outside-inside2': '⿷',
'inside-outside2': '⿶'
# 'unknown': '?'
}
decomposition_types_reversed = dict(((value, key) for key, value in decomposition_types.items()))
file = []
if not os.path.isfile('data/dictionary.json'):
with open('data/dictionary.txt') as d:
for line in d:
file.append(json.loads(line))
for i, item in enumerate(file):
item['id'] = i + 1
json.dump(file, open('data/dictionary.json', 'w+'))
else:
file = json.load(open('data/dictionary.json'))
def is_parsed(blocks):
for block in blocks:
if not block['is_unit']:
return False
return True
def search(character, dictionary=file):
for hanzi in dictionary:
if hanzi['character'] == character:
return hanzi
return False
def parse(decomp):
if len(decomp) == 1:
return {"spacing": '?'}
blocks = []
n_loops = 0
for item in decomp:
blocks.append({"char": item, "is_spacing": item in decomposition_types_reversed, "is_unit": False})
while not is_parsed(blocks):
for i, item in enumerate(blocks):
if "is_spacing" in item:
if item['is_spacing']:
next_items = decomposition_types_reversed[item['char']].count('-') + 1
can_match = True
for x in blocks[i + 1:i + 1 + next_items]:
try:
if x['char'] in decomposition_types_reversed:
can_match = False
except KeyError:
pass
if can_match:
blocks[i] = {"spacing": item['char'],
"chars": [l['char'] if 'char' in l else l for l in
blocks[i + 1:i + 1 + next_items]],
"is_unit": True}
del blocks[i + 1:i + 1 + next_items]
n_loops += 1
if n_loops > 10:
print(decomp)
sys.exit()
return blocks

Windows/Python Error WindowsError: [Error 3] The system cannot find the path specified

Hi I am new to python and i need some help. I trying to run a file on Windows 10 OS with python 2.7.
import os
import re
import codecs
import numpy as np
import theano
models_path = "./models"
eval_path = "./evaluation"
eval_temp = os.path.join(eval_path, "temp")
eval_script = os.path.join(eval_path, "conlleval")
def get_name(parameters):
"""
Generate a model name from its parameters.
"""
l = []
for k, v in parameters.items():
if type(v) is str and "/" in v:
l.append((k, v[::-1][:v[::-1].index('/')][::-1]))
else:
l.append((k, v))
name = ",".join(["%s=%s" % (k, str(v).replace(',', '')) for k, v in l])
return "".join(i for i in name if i not in "\/:*?<>|")
def set_values(name, param, pretrained):
"""
Initialize a network parameter with pretrained values.
We check that sizes are compatible.
"""
param_value = param.get_value()
if pretrained.size != param_value.size:
raise Exception(
"Size mismatch for parameter %s. Expected %i, found %i."
% (name, param_value.size, pretrained.size)
)
param.set_value(np.reshape(
pretrained, param_value.shape
).astype(np.float32))
def shared(shape, name):
"""
Create a shared object of a numpy array.
"""
if len(shape) == 1:
value = np.zeros(shape) # bias are initialized with zeros
else:
drange = np.sqrt(6. / (np.sum(shape)))
value = drange * np.random.uniform(low=-1.0, high=1.0, size=shape)
return theano.shared(value=value.astype(theano.config.floatX), name=name)
def create_dico(item_list):
"""
Create a dictionary of items from a list of list of items.
"""
assert type(item_list) is list
dico = {}
for items in item_list:
for item in items:
if item not in dico:
dico[item] = 1
else:
dico[item] += 1
return dico
def create_mapping(dico):
"""
Create a mapping (item to ID / ID to item) from a dictionary.
Items are ordered by decreasing frequency.
"""
sorted_items = sorted(dico.items(), key=lambda x: (-x[1], x[0]))
id_to_item = {i: v[0] for i, v in enumerate(sorted_items)}
item_to_id = {v: k for k, v in id_to_item.items()}
return item_to_id, id_to_item
def zero_digits(s):
"""
Replace every digit in a string by a zero.
"""
return re.sub('\d', '0', s)
def iob2(tags):
"""
Check that tags have a valid IOB format.
Tags in IOB1 format are converted to IOB2.
"""
for i, tag in enumerate(tags):
if tag == 'O':
continue
split = tag.split('-')
if len(split) != 2 or split[0] not in ['I', 'B']:
return False
if split[0] == 'B':
continue
elif i == 0 or tags[i - 1] == 'O': # conversion IOB1 to IOB2
tags[i] = 'B' + tag[1:]
elif tags[i - 1][1:] == tag[1:]:
continue
else: # conversion IOB1 to IOB2
tags[i] = 'B' + tag[1:]
return True
def iob_iobes(tags):
"""
IOB -> IOBES
"""
new_tags = []
for i, tag in enumerate(tags):
if tag == 'O':
new_tags.append(tag)
elif tag.split('-')[0] == 'B':
if i + 1 != len(tags) and \
tags[i + 1].split('-')[0] == 'I':
new_tags.append(tag)
else:
new_tags.append(tag.replace('B-', 'S-'))
elif tag.split('-')[0] == 'I':
if i + 1 < len(tags) and \
tags[i + 1].split('-')[0] == 'I':
new_tags.append(tag)
else:
new_tags.append(tag.replace('I-', 'E-'))
else:
raise Exception('Invalid IOB format!')
return new_tags
def iobes_iob(tags):
"""
IOBES -> IOB
"""
new_tags = []
for i, tag in enumerate(tags):
if tag.split('-')[0] == 'B':
new_tags.append(tag)
elif tag.split('-')[0] == 'I':
new_tags.append(tag)
elif tag.split('-')[0] == 'S':
new_tags.append(tag.replace('S-', 'B-'))
elif tag.split('-')[0] == 'E':
new_tags.append(tag.replace('E-', 'I-'))
elif tag.split('-')[0] == 'O':
new_tags.append(tag)
else:
raise Exception('Invalid format!')
return new_tags
def insert_singletons(words, singletons, p=0.5):
"""
Replace singletons by the unknown word with a probability p.
"""
new_words = []
for word in words:
if word in singletons and np.random.uniform() < p:
new_words.append(0)
else:
new_words.append(word)
return new_words
def pad_word_chars(words):
"""
Pad the characters of the words in a sentence.
Input:
- list of lists of ints (list of words, a word being a list of char indexes)
Output:
- padded list of lists of ints
- padded list of lists of ints (where chars are reversed)
- list of ints corresponding to the index of the last character of each word
"""
max_length = max([len(word) for word in words])
char_for = []
char_rev = []
char_pos = []
for word in words:
padding = [0] * (max_length - len(word))
char_for.append(word + padding)
char_rev.append(word[::-1] + padding)
char_pos.append(len(word) - 1)
return char_for, char_rev, char_pos
def create_input(data, parameters, add_label, singletons=None):
"""
Take sentence data and return an input for
the training or the evaluation function.
"""
words = data['words']
chars = data['chars']
if singletons is not None:
words = insert_singletons(words, singletons)
if parameters['cap_dim']:
caps = data['caps']
char_for, char_rev, char_pos = pad_word_chars(chars)
input = []
if parameters['word_dim']:
input.append(words)
if parameters['char_dim']:
input.append(char_for)
if parameters['char_bidirect']:
input.append(char_rev)
input.append(char_pos)
if parameters['cap_dim']:
input.append(caps)
if add_label:
input.append(data['tags'])
return input
def evaluate(parameters, f_eval, raw_sentences, parsed_sentences,
id_to_tag, dictionary_tags, eval_id):
"""
Evaluate current model using CoNLL script.
"""
n_tags = len(id_to_tag)
predictions = []
count = np.zeros((n_tags, n_tags), dtype=np.int32)
for raw_sentence, data in zip(raw_sentences, parsed_sentences):
input = create_input(data, parameters, False)
if parameters['crf']:
y_preds = np.array(f_eval(*input))[1:-1]
else:
y_preds = f_eval(*input).argmax(axis=1)
y_reals = np.array(data['tags']).astype(np.int32)
assert len(y_preds) == len(y_reals)
p_tags = [id_to_tag[y_pred] for y_pred in y_preds]
r_tags = [id_to_tag[y_real] for y_real in y_reals]
if parameters['tag_scheme'] == 'iobes':
p_tags = iobes_iob(p_tags)
r_tags = iobes_iob(r_tags)
for i, (y_pred, y_real) in enumerate(zip(y_preds, y_reals)):
new_line = " ".join(raw_sentence[i][:-1] + [r_tags[i], p_tags[i]])
predictions.append(new_line)
count[y_real, y_pred] += 1
predictions.append("")
# Write predictions to disk and run CoNLL script externally
#eval_id = np.random.randint(1000000, 2000000)
output_path = os.path.join(eval_temp, "eval.%i.output" % eval_id)
scores_path = os.path.join(eval_temp, "eval.%i.scores" % eval_id)
with codecs.open(output_path, 'w', 'utf8') as f:
f.write("\n".join(predictions))
os.system("%s < %s > %s" % (eval_script, output_path, scores_path))
# CoNLL evaluation results
eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')]
#trainLog = open('train.log', 'w')
for line in eval_lines:
print line
#trainLog.write("%s\n" % line)
# Remove temp files
# os.remove(output_path)
# os.remove(scores_path)
# Confusion matrix with accuracy for each tag
print ("{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * n_tags)).format(
"ID", "NE", "Total",
*([id_to_tag[i] for i in xrange(n_tags)] + ["Percent"])
)
for i in xrange(n_tags):
print ("{: >2}{: >7}{: >7}%s{: >9}" % ("{: >7}" * n_tags)).format(
str(i), id_to_tag[i], str(count[i].sum()),
*([count[i][j] for j in xrange(n_tags)] +
["%.3f" % (count[i][i] * 100. / max(1, count[i].sum()))])
)
# Global accuracy
print "%i/%i (%.5f%%)" % (
count.trace(), count.sum(), 100. * count.trace() / max(1, count.sum())
)
# F1 on all entities
return float(eval_lines[1].strip().split()[-1])
When i compile the code as it is i always get the error.I think its either because of restriction on path length in windows or it needs or slashes. I dont know what to add to subtract in order to resolve the problem.
run train.py --train lstm/fold1/train --dev lstm/fold1/dev --test lstm/fold1/test
WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10). Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29
Using gpu device 0: GeForce GT 620M (CNMeM is enabled with initial size: 85.0% of memory, cuDNN not available)
Traceback (most recent call last):
File "E:\New-Code\tagger-master\tagger-master\train.py", line 135, in
model = Model(parameters=parameters, models_path=models_path)
File "model.py", line 36, in init
os.makedirs(self.model_path)
File "C:\Users\Acer\Anaconda2\envs\env_name27\lib\os.py", line 157, in makedirs
mkdir(name, mode)
WindowsError: [Error 3] The system cannot find the path specified: './models\tag_scheme=iob,lower=False,zeros=False,char_dim=25,char_lstm_dim=25,char_bidirect=True,word_dim=100,word_lstm_dim=100,word_bidirect=True,pre_emb=,all_emb=False,cap_dim=0,crf=True,dropout=0.3,lr_method=sgd-lr_.005'
In windows pathe is given by back slash \ instead of forward slash / which is used in linux/unix.
Try it like blow if file is 1 folder back:
models_path = "..\models"
eval_path = "..\evaluation"

SHA256 doesn't yield same result

I'm following on this tutorial and in part 2 (picture below) it shows that the SHA256 yields a result different than what I get when I ran my python code:
the string is: 0450863AD64A87AE8A2FE83C1AF1A8403CB53F53E486D8511DAD8A04887E5B23522CD470243453A299FA9E77237716103ABC11A1DF38855ED6F2EE187E9C582BA6
While the tutorial SHA256 comes to: 600FFE422B4E00731A59557A5CCA46CC183944191006324A447BDB2D98D4B408
My short python shows:
sha_result = sha256(bitconin_addresss).hexdigest().upper()
print sha_result
32511E82D56DCEA68EB774094E25BAB0F8BDD9BC1ECA1CEEDA38C7A43ACEDDCE
in fact, any online sha256 shows the same python result; so am I missing here something?
You're hashing the string when you're supposed to be hashing the bytes represented by that string.
>>> hashlib.sha256('0450863AD64A87AE8A2FE83C1AF1A8403CB53F53E486D8511DAD8A04887E5B23522CD470243453A299FA9E77237716103ABC11A1DF38855ED6F2EE187E9C582BA6'.decode('hex')).hexdigest().upper()
'600FFE422B4E00731A59557A5CCA46CC183944191006324A447BDB2D98D4B408'
You could use Gavin's "base58.py", which I believe he no longer shares it on his github page. However you probably could easily google and find different versions of it from github.
Here is one version edited a little by me:
#!/usr/bin/env python
"""encode/decode base58 in the same way that Bitcoin does"""
import math
import sys
__b58chars = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
__b58base = len(__b58chars)
def b58encode(v):
""" encode v, which is a string of bytes, to base58.
"""
long_value = 0L
for (i, c) in enumerate(v[::-1]):
long_value += ord(c) << (8*i) # 2x speedup vs. exponentiation
result = ''
while long_value >= __b58base:
div, mod = divmod(long_value, __b58base)
result = __b58chars[mod] + result
long_value = div
result = __b58chars[long_value] + result
# Bitcoin does a little leading-zero-compression:
# leading 0-bytes in the input become leading-1s
nPad = 0
for c in v:
if c == '\0': nPad += 1
else: break
return (__b58chars[0]*nPad) + result
def b58decode(v):
""" decode v into a string of len bytes
"""
long_value = 0L
for (i, c) in enumerate(v[::-1]):
long_value += __b58chars.find(c) * (__b58base**i)
result = ''
while long_value >= 256:
div, mod = divmod(long_value, 256)
result = chr(mod) + result
long_value = div
result = chr(long_value) + result
nPad = 0
for c in v:
if c == __b58chars[0]: nPad += 1
else: break
result = chr(0)*nPad + result
return result
try:
import hashlib
hashlib.new('ripemd160')
have_crypto = True
except ImportError:
have_crypto = False
def hash_160(public_key):
if not have_crypto:
return ''
h1 = hashlib.sha256(public_key).digest()
r160 = hashlib.new('ripemd160')
r160.update(h1)
h2 = r160.digest()
return h2
def hash_160_to_bc_address(h160, version="\x00"):
if not have_crypto:
return ''
vh160 = version+h160
h3=hashlib.sha256(hashlib.sha256(vh160).digest()).digest()
addr=vh160+h3[0:4]
return b58encode(addr)
def public_key_to_bc_address(public_key, version="\x00"):
if not have_crypto or public_key is None:
return ''
h160 = hash_160(public_key)
return hash_160_to_bc_address(h160, version=version)
def sec_to_bc_key(sec, version="\x80"):
if not have_crypto or sec is None:
return ''
vsec = version+sec +"\x01"
hvsec=hashlib.sha256(hashlib.sha256(vsec).digest()).digest()
return b58encode(vsec+hvsec[0:4])
def bc_key_to_sec(prv):
return b58decode(prv)[1:33]
def bc_address_to_hash_160(addr):
bytes = b58decode(addr)
return bytes[1:21]
if __name__ == '__main__':
if len(sys.argv) > 1:
if sys.argv[1] == '-en':
print b58encode(sys.argv[2].decode('hex_codec'))
if sys.argv[1] == '-de':
print b58decode(sys.argv[2]).encode('hex_codec')
if sys.argv[1] == '-pub':
print public_key_to_bc_address(sys.argv[2].decode('hex_codec'))
if sys.argv[1] == '-adr':
print bc_address_to_hash_160(sys.argv[2]).encode('hex_codec')
if sys.argv[1] == '-sec':
print sec_to_bc_key(sys.argv[2].decode('hex_codec'))
if sys.argv[1] == '-prv':
print bc_key_to_sec(sys.argv[2]).encode('hex_codec')
else:
print ''
print 'Usage: ./base58.py [options]'
print ''
print ' -en converts hex to base58'
print ' -de converts base58 to hex'
print
print ' -pub public_key_to_bc_address'
print ' -adr bc_address_to_hash_160'
print
print ' -sec sec_to_bc_key'
print ' -prv bc_key_to_sec'
print
To answer your specific question, based on above code you could use this command:
hashlib.sha256('0450863AD64A87AE8A2FE83C1AF1A8403CB53F53E486D8511DAD8A04887E5B23522CD470243453A299FA9E77237716103ABC11A1DF38855ED6F2EE187E9C582BA6'.decode('hex_codec')).digest().encode('hex_codec').upper()

Python - WindowName for Compare with List

I am currently blocked on a point of a program in Python.
I wish to compare in a list, the WindowName event to launch directives.
Example:
import win32api
import pyHook
liste = ["Google", "Task"]
if event.WindowName == liste:
Screenshot ()
return True
else:
return False
Complete code, he work:
def OnMouseEvent(event):
global interval
data = '\n[' + str(time.ctime().split(' ')[3]) + ']' \
+ ' WindowName : ' + str(event.WindowName)
data += '\n\tButton:' + str(event.MessageName)
data += '\n\tClicked in (Position):' + str(event.Position)
data += '\n===================='
global t, start_time, pics_names
"""
Code Edit
"""
t = t + data
if len(t) > 300:
ScreenShot()
"""
Finish
"""
if len(t) > 500:
f = open('Logfile.txt', 'a')
f.write(t)
f.close()
t = ''
if int(time.time() - start_time) == int(interval):
Mail_it(t, pics_names)
start_time = time.time()
t = ''
return True
else:
return False
When i edit the code in """ doesn't work :
t = t + data
liste = ["Google", "Task"]
if event.WindowName == liste:
ScreenShot()
He return :
File "C:\Python26\lib\site-packages\pyHook\HookManager.py", line 324, in MouseSwitch func = self.mouse_funcs.get(msg) TypeError: an integer is required
I test this :
HookManager: func = self.keyboard_funcs.get(msg) to: func=self.keyboard_funcs.get( int(str(msg)) )
But is don't work, i think i note all problem.
Thanks for you help in advance :)

(Python) List index out of range when trying to pull data out of a .CSV?

This program pulls data out of two .CSV files, which are linked here:
https://drive.google.com/folderview?id=0B1SjPejhqNU-bVkzYlVHM2oxdGs&usp=sharing
It's supposed to look for anything after a comma in each of the two files, but my range logic is somehow wrong. I'm running a traceback error to line 101:
"line 101, in calc_corr: sum_smokers_value = sum_smokers_value + float(s_percent_smokers_data[r][1])
IndexError: list index out of range"
I assume it would do the same for the other times [k][1] shows up.
many thanks in advance if there's a way to fix this.
the program so far is:
# this program opens two files containing data and runs a corralation calculation
import math
def main():
try:
print('does smoking directly lead to lung cancer?')
print('''let's find out, shall we?''''')
print('to do so, this program will find correlation between the instances of smokers, and the number of people with lung cancer.')
percent_smokers, percent_cancer = retrieve_csv()
s_percent_smokers_data, c_percent_cancer_data = read_csv(percent_smokers, percent_cancer)
correlation = calc_corr(s_percent_smokers_data, c_percent_cancer_data,)
print('r_value =', corretation)
except IOError as e:
print(str(e))
print('this program has been cancelled. run it again.')
def retrieve_csv():
num_times_failed = 0
percent_smokers_opened = False
percent_cancer_opened = False
while((not percent_smokers_opened) or (not percent_cancer_opened)) and (num_times_failed < 5):
try:
if not percent_smokers_opened:
percent_smokers_input = input('what is the name of the file containing the percentage of smokers per state?')
percent_smokers = open(percent_smokers_input, 'r')
percent_smokers_opened = True
if not percent_cancer_opened:
percent_cancer_input = input('what is the name of the file containing the number of cases of lung cancer contracted?')
percent_cancer = open(percent_cancer_input, 'r')
percent_cancer_opened = True
except IOError:
print('a file was not located. try again.')
num_times_failed = num_times_failed + 1
if not percent_smokers_opened or not percent_cancer_opened:
raise IOError('you have failed too many times.')
else:
return(percent_smokers, percent_cancer)
def read_csv(percent_smokers, percent_cancer):
s_percent_smokers_data = []
c_percent_cancer_data = []
empty_list = ''
percent_smokers.readline()
percent_cancer.readline()
eof = False
while not eof:
smoker_list = percent_smokers.readline()
cancer_list = percent_cancer.readline()
if smoker_list == empty_list and cancer_list == empty_list:
eof = True
elif smoker_list == empty_list:
raise IOError('smokers file error')
elif cancer_list == empty_list:
raise IOError('cancer file error')
else:
s_percent_smokers_data.append(smoker_list.strip().split(','))
c_percent_cancer_data.append(cancer_list.strip().split(','))
return (s_percent_smokers_data, c_percent_cancer_data)
def calc_corr(s_percent_smokers_data, c_percent_cancer_data):
sum_smokers_value = sum_cancer_cases_values = 0
sum_smokers_sq = sum_cancer_cases_sq = 0
sum_value_porducts = 0
numbers = len(s_percent_smokers_data)
for k in range(0, numbers):
sum_smokers_value = sum_smokers_value + float(s_percent_smokers_data[k][1])
sum_cancer_cases_values = sum_cancer_cases_values + float(c_percent_cancer_data[k][1])
sum_smokers_sq = sum_smokers_sq + float(s_percent_smokers_data[k][1]) ** 2
sum_cancer_cases_sq = sum_cancer_cases_sq + float(c_percent_cancer_data[k][1]) ** 2
sum_value_products = sum_value_products + float(percent_smokers[k][1]) ** float(percent_cancer[k][1])
numerator_value = (numbers * sum_value_products) - (sum_smokers_value * sum_cancer_cases_values)
denominator_value = math.sqrt(abs((numbers * sum_smokers_sq) - (sum_smokers_value ** 2)) * ((numbers * sum_cancer_cases_sq) - (sum_cancer_cases_values ** 2)))
return numerator_value / denominator_value
main()
The values in each row of your data files are not comma separated, but rather tab separated. You need to change the ',' delimiter character you're splitting on for '\t'. Or perhaps use the csv module and tell it that your delimiter is '\t'. You can read more about the csv module in the documentation.

Categories

Resources