A python 3.x KeyError with already 'dict' maybe..?

A python 3.x KeyError with already 'dict' maybe..? - python

def get_data_list(file_object,column_number):
contents = []
for string in file_object:
contents.append(tuple(string.split(',')))
list = []
for i in range(len(contents) - 1):
list.append((contents[i + 1][0], float(contents[i + 1][column_number])))
list.sort()
return list
def average_data(list_of_tuples):
dict = {'01':'January','02':'Februday','03':'March','04':'April','05':'May','06':'June','07':'July','08':'August','09':'September','10':'October','11':'November','12':'December'}
current_month = 0
total = 0
count = 1
average_data = []
for k in list_of_tuples:
for data in k:
data = str(data)
month = data[4:6]
if month == current_month:
total += k[1]
if count != 1:
count += 1
else:
current_month = month
average = float(total/count)
average_data.append((float(average),dict[data[4:6]]+data[0:4]))
total = 0
average_data = sorted(average_data)
return average_data
These are my code but when I try to run it returns an error:
KeyError:'28'or some other keyError with numbers
But I thought I already set all the numbers in dict...
And also, the data[4:6] comes from the date number, such as 20160407, 20141105.

Related

CS50 DNA - it works for small.csv but not for large

https://cs50.harvard.edu/x/2020/psets/6/dna/#:~:text=python%20dna.py%20databases/large.csv%20sequences/5.txt
I'm trying to solve this problem from CS50 but it just works for the small database, when I try it for the large one the program overcounts.
import csv
if len(argv) != 3:
print("DIGITA DIREITO, IMBECIL")
exit()
with open(argv[1], "r") as source:
reader = list(csv.reader(source))
reader[0].remove("name")
i = reader[0]
with open(argv[2], "r") as sequence:
seq = sequence.read()
values = []
for j in range(len(i)):
value = 0
counter = 0
pos = 0
prevpos = 0
while pos < len(seq):
pos = seq.find(i[j], pos)
if pos == -1:
counter = 0
break
elif (pos != 1):
counter += 1
prevpos = pos
pos += len(i[j])
if value < counter:
value = counter
values.append(value)
for row in range(len(reader)):
print(reader[row])
print(values)
values = list(map(str, values))
search = list(reader)
search.pop(0)
for result in search:
if result[1:] == values:
print(f"{result[0]}")
break
elif result == search[-1]:
print("No match")

I think you are just counting the STRs repetitions in the sequence, not the maximum consecutive STR repetitions. This is what the problem asks

Kick start RE (Test set skipped) error using python

I was practicing on google kick start Round A 2016 ( Country Leader ) but iam getting error that says Runtime error and can't figure what is wrong.
here is my code
First one :
T = int(input().strip())
tries = []
for i in range(1, T + 1):
N = int(input().strip())
persons = list()
for t in range(1, N + 1):
persons.append(input().strip())
tries.append(persons)
winners = []
for t in tries:
points = 0
temp = ''
for per in t:
per_ltr = per.replace(' ','')
if len(set(per_ltr)) > points:
points = len(set(per_ltr))
temp = per
winners.append(temp)
num = 1
for one in winners:
print(f'Case #{num}: {one}')
num += 1
another one :
T = int(input())
winner = []
for i in range(T):
N = int(input())
persons = []
for j in range(N):
persons.append(input())
points = len(set(persons[0]))
temp = persons[0]
for per in persons:
if len(set(per)) > points:
points = len(set(per))
temp = per
winner.append(temp)
num = 1
for one in winner:
print(f'Case #{num}: {one}')
num += 1

python type 'list' doesn't have expected attribute 'tolist'

i am coding python in PyCharm and it is giving me this warning:
type 'list' doesn't have expected attribute 'tolist'
however I have declared my variable list, and here is my code:
...
my_list = []
big_list = []
i= 0
count = 0
while i < len(data):
if data[i][3] < 0:
i += 1
continue
my_list.append([data[i][0], data[i][1], data[i][2], data[i][3]])
if i == len(df) - 1:
count += 1
self.myfancyfunction(my_list, count)
big_list.append(my_list)
elif i < len(data) - 1 and data[i][3] != data[i + 1][3]:
count += 1
self.myfancyfunction(my_list, count)
big_list.append(my_list)
my_list = []
cluster += 1
i += 1
in the two instances of self.myfancyfunction(my_list, count), the my_list variable is underlined and shows the above error.
Could you please help to fix this?
update
here is my fancy function:
def myfancyfunction(self, array_ls, count):
dict1 = {}
dict2 = {}
array_ls = np.asarray(array_ls)
array_ls = array_ls[:, 2].astype(int)
self.info_ids.append(array_ls.tolist())
sys.stdout.flush()
with open('myfile.txt', "r") as myfile:
for line in myfile:
if int(line.split()[0]) in array_ls:
for element in line.split()[1:]:
key = element.split(":")[0]
value = float(element.split(":")[1])
if key in self.reference:
if not str(key) in dict1:
dict1[str(key)] = 1
dict2[str(key)] = value
else:
dict1[str(key)] += 1
dict2[str(key)] += value
self.info_freq.append(dict1)
self.info_vals.append(dict2)
I am inside a class and have a couple of functions.

Writing Dictionary to CSV

I have iterated over a database and created a dictionary. The key is records in field 1 and the values are the averages of in each column for their corresponding record in field 1. The questions I have is what is the best way to output my dictionary to a table?
myDict = {}
def Calculate(key, fields, dt):
results = {}
for rec in arcpy.da.SearchCursor(table, "*"):
header[names] = row[1]
if results.has_key(key):
result = results[key]
i = 0
while i < len(fields):
result[i] += rec[fields[i]]
i += 1
result[len(fields)] += 1
else:
temp = []
i = 0
while i < len(fields):
temp.append(rec[fields[i]])
i += 1
temp.append (1)
results[rec[key]] = temp
endResults = {}
for k in results:
j = 0
tempEndResults = []
while j < len(results[k]) - 1:
tempEndResults.append(results[k][j] / results[k][len(results[k])-1])
j += 1
endResults[k] = tempEndResults
i += 1
return endResults
Calculate(1, [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23], myDict)

The questions I have is what is the best way
to output my dictionary to a table?
To create a CSV, you can use a nested loop to print the inner values in tabular form:
for k, seq in myDict.items():
for elem in seq:
print '%s,%s' % (k, elem)
This should give you a nice looking CSV table.

Python trying to Refactor (DRY out) a long Control Flow

I am grabbing a lot of data from and SQL query that takes a long time to run. Since the SQL query takes so long to run, I am grabbing the data from the database in its most granular form. I then cycle through this data once and aggregate it in the forms that are useful to me.
My problem is that I am repeating myself over and over again. However, I am not sure of the best way to refactor this control flow. Thanks in advance!
def processClickOutData(cls, raw_data):
singles = {}
total={}
absolute_total = 0
channels = {}
singles_true = {}
total_true={}
channels_true = {}
absolute_total_true = 0
list_channels = set([])
list_tids = set([])
total_position = {}
total_position_true = {}
tid_position = {}
channel_position = {}
channel_position_true = {}
tid_position_true = {}
for row in raw_data:
gap=row[0]
count=row[1]
tid=row[2]
prefered=row[3]
channel=row[4]
position=row[5]
list_channels.add(channel)
list_tids.add(tid)
absolute_total += int(count)
if total.has_key(gap):
total[gap] += count
else:
total[gap] = count
if singles.has_key(gap) and singles[gap].has_key(tid):
singles[gap][tid] += count
elif singles.has_key(gap):
singles[gap][tid] = count
else:
singles[gap] = {}
singles[gap][tid] = count
if channels.has_key(gap) and channels[gap].has_key(channel):
channels[gap][channel] += count
elif channels.has_key(gap):
channels[gap][channel] = count
else:
channels[gap] = {}
channels[gap][channel] = count
if total_position.has_key(position):
total_position[position] += count
else:
total_position[position] = count
if tid_position.has_key(position) and tid_position[position].has_key(tid):
tid_position[position][tid] += count
elif tid_position.has_key(position):
tid_position[position][tid] = count
else:
tid_position[position] = {}
tid_position[position][tid] = count
if channel_position.has_key(position) and channel_position[position].has_key(channel):
channel_position[position][channel] += count
elif channel_position.has_key(position):
channel_position[position][channel] = count
else:
channel_position[position] = {}
channel_position[position][channel] = count
if prefered == 0:
absolute_total_true += count
if total_true.has_key(gap):
total_true[gap] += count
else:
total_true[gap] = count
if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
singles_true[gap][tid] += count
elif singles_true.has_key(gap):
singles_true[gap][tid] = count
else:
singles_true[gap] = {}
singles_true[gap][tid] = count
if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
channels_true[gap][channel] += count
elif channels_true.has_key(gap):
channels_true[gap][channel] = count
else:
channels_true[gap] = {}
channels_true[gap][channel] = count
if total_position_true.has_key(position):
total_position_true[position] += count
else:
total_position_true[position] = count
if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
tid_position_true[position][tid] += count
elif tid_position_true.has_key(position):
tid_position_true[position][tid] = count
else:
tid_position_true[position] = {}
tid_position_true[position][tid] = count
if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
channel_position_true[position][channel] += count
elif channel_position_true.has_key(position):
channel_position_true[position][channel] = count
else:
channel_position_true[position] = {}
channel_position_true[position][channel] = count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values

The entire structure you're using to store the data is probably wrong, but since I don't know how you're using it, I can't help you with that.
You can get rid of all of those has_key() calls by using collections.defaultdict. Note thedict.has_key(key) is deprecated anyway, you should just use key in thedict instead.
Look at how I change the for loop too -- you can assign to names right in the for statement, no need to do it separately.
from collections import defaultdict
def processClickOutData(cls, raw_data):
absolute_total = 0
absolute_total_true = 0
list_channels = set()
list_tids = set()
total = defaultdict(int)
total_true = defaultdict(int)
total_position = defaultdict(int)
total_position_true = defaultdict(int)
def defaultdict_int():
return defaultdict(int)
singles = defaultdict(defaultdict_int)
singles_true = defaultdict(defaultdict_int)
channels = defaultdict(defaultdict_int)
channels_true = defaultdict(defaultdict_int)
tid_position = defaultdict(defaultdict_int)
tid_position_true = defaultdict(defaultdict_int)
channel_position = defaultdict(defaultdict_int)
channel_position_true = defaultdict(defaultdict_int)
for gap, count, prefered, channel, position in raw_data:
list_channels.add(channel)
list_tids.add(tid)
absolute_total += count
total[gap] += count
singles[gap][tid] += count
channels[gap][channel] += count
total_position[position] += count
tid_position[position][tid] += count
channel_position[position][channel] += count
if prefered == 0:
absolute_total_true += count
total_true[gap] += count
singles_true[gap][tid] += count
channels_true[gap][channel] += count
total_position_true[position] += count
tid_position_true[position][tid] += count
channel_position_true[position][channel] += count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values
What this does is automatically fill in the correct default values if the keys don't exist. You've got two kinds here. Where you're adding ints, you want to start with 0 if it doesn't exist -- that's what int returns, hence defaultdict(int). Where you're adding a dictionary that adds ints, you need to use a function that returns a defaultdict(int) which is what defaultdict_int does.
Edit: Suggested alternate dictionary structure:
position = defaultdict(lambda: defaultdict(defaultdict_int))
gap = defaultdict(lambda: defaultdict(defaultdict_int))
absolute_total = 0
for gap, count, prefered, channel, position in raw_data:
absolute_total += count
posd = position[position]
posd.setdefault('total', 0)
posd['total'] += count
posd['tid'][tid] += count
posd['channel'][channel] += count
gapd = gap[gap]
gapd.setdefault('total', 0)
gapd['total'] += count
gapd['tid'][tid] += count
gapd['channel'][channel] += count
Do the same with the _true versions as well, and you've gone from 12 dicts to 4.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

A python 3.x KeyError with already 'dict' maybe..? - python

Related

CS50 DNA - it works for small.csv but not for large

Kick start RE (Test set skipped) error using python

python type 'list' doesn't have expected attribute 'tolist'

Writing Dictionary to CSV

Python trying to Refactor (DRY out) a long Control Flow

Categories

Resources