I'm trying to use this dictionary:
student_data_dict = {'Student_1': 'bbbeaddacddcddaaadbaabdad', 'Student_2': 'acbccaddcadaaacdadbcabcad', 'Student_3': 'babcabdccadcDdbccdbaadbad', 'Student_4': 'bcbcabddcadcdabccdbaadcbd', 'Student_5': 'DCBCCADDCADBDACCDBBACBCAD', 'Student_6': 'acbeccddcadbaaccabbacdcad', 'Student_7': 'BCBCBCDABADCADCCDABAACCAD', 'Student_8': 'dcbccbddcadaabcbcacabbcad', 'Student_9': 'DDBDBBCDDCCBABCCBACADAAAC', 'Student_10': 'cbbdacdacadcbadbabaabcaTa', 'Student_11': 'BDBECADCAADCAAAAACBACACAD', 'Student_12': 'DBBCCBDCCADCDABABCBAABCAD', 'Student_13': 'BCBCBCDDCADCAAACCABACACAD', 'Student_14': 'DBBECBDACADAAACBCBAAABCBD', 'Student_15': 'acbebbddcadbaacccbcaddcad', 'Student_16': 'ACBEBCDDCADBAACCAACADBCAD', 'Student_17': 'DBBCACDDCADCAABCADBABDDAD', 'Student_18': 'dcbcdcdbbddccabbdacacccbd', 'Student_19': 'dbbccbddcadaaaccbdcaaacad', 'Student_20': 'abbdaaddcadcaaccbdcaaccbd', 'Student_21': 'DCDCABDBCADAAACDCCDAACAAD', 'Student_22': 'dabdaddabddbaacdacbaaaaad', 'Student_23': 'BCBCDDDACCDCAABDDABACACAD', 'Student_24': 'ACBDCBDBBCDAACCCCBDAADCBD', 'Student_25': 'DCBCACDAADDCADCBAABACBCAD', 'Student_26': 'dcbaabdccadcdadcccbaabdbd', 'Student_27': 'abbadbddcadacbcacccacbdad'}
and store the first letter for all students as a dictionary entry and then do the same for the next letter ect... to result in:
{'question_1': 'babbDaBdDcBDBDaADddaDdBADda', 'question_2': 'bcacCcCcDbDBCBcCBcbbCaCCCcb', 'question_3': 'bbbbBbBbBbBBBBbBBbbbDbBBBbb', 'question_4': 'ecccCeCcDdECCEeECccdCdCDCaa', 'question_5': 'acaaCcBcBaCCBCbBAdcaAaDCAad', 'question_6': 'dabbAcCbBcABCBbCCcbaBdDBCbb', 'question_7': 'ddddDdDdCdDDDDdDDdddDdDDDdd', 'question_8': 'adcdDdAdDaCCDAdDDbddBaABAcd', 'question_9': 'ccccCcBcDcACCCcCCbccCbCBAcc', 'question_10': 'daaaAaAaCaAAAAaAAdaaAdCCDaa', 'question_11': 'ddddDdDdCdDDDDdDDdddDdDDDdd', 'question_12': 'caccBbCaBcCCCAbBCcacAbCACca', 'question_13': 'daDdDaAaAbADAAaAAcaaAaAAAdc', 'question_14': 'dadaAaDbBaAAAAaAAaaaAaACDab', 'question_15': 'acbbCcCcCdABACcCBbccCcBCCdc', 'question_16': 'adccCcCbCbAACBcCCbccDdDCBca', 'question_17': 'aaccDaDcBaABCCcAAdbbCaDCAcc', 'question_18': 'ddddBbAaAbCCABbADaddCcABAcc', 'question_19': 'bbbbBbBcCaBBBAcCBcccDbBDBbc', 'question_20': 'acaaAaAaAaAAAAaAAaaaAaAAAaa', 'question_21': 'aaaaCcAbDbCACAdDBcaaAaCACac', 'question_22': 'bbddBdCbAcABABdBDcacCaADBbb', 'question_23': 'dcbcCcCcAaCCCCcCDcccAaCCCdd', 'question_24': 'aaabAaAaATAAABaAAbabAaABAba', 'question_25': 'ddddDdDdCaDDDDdDDdddDdDDDdd'}
x = 1
all_letters = ''
letter = ''
y = 1
i = 0
z = 0
for start in student_data_dict:
student = student_data_dict.get('Student_' + str(y))
letter = student[z]
all_letters = all_letters + letter
y = y + 1
i = i + 1
question_data_dict["question " + str(x)] = all_letters
if i == 27:
z = z + 1
x = x + 1
i = 0
print(question_data_dict)
data_file.close()
{'question 1': 'babbDaBdDcBDBDaADddaDdBADda'}
is what I get but I can't get the answers for the other 25 questions.
I tried changing for start in student_data_dict: into while z<26: but at the line "letter = student[z]" I get the error "'NoneType' object is not subscriptable"
num_questions = 25
answers_dict = {}
for i in range(num_questions):
answers_dict['question' + str(i)] = ''.join(c[i] for c in student_data_dict.values())
print(answers_dict)
Will give you the result you want.
Edit
Fixed code. Extracted number of questions to a variable so it can be used as index
Edit2
I created an OrderedDict from your original dictionary to maintain answer order when iterating. Now the answers_dict contains valid data.
from collections import OrderedDict
ordered_data = OrderedDict()
for i in range(len(student_data_dict.items())):
ordered_data['Student_' + str(i + 1)] = student_data_dict.get('Student_' + str(i + 1))
num_questions = 25
answers_dict = {}
for i in range(num_questions):
answers_dict['question' + str(i + 1)] = ''.join(c[i] for c in ordered_data.values())
You need to reset y when you move to the next question.
Here's an alternative a way to get what you're looking for with Pandas:
import pandas as pd
sdd = {k:[x for x in v] for k,v in student_data_dict}
df = pd.DataFrame(sdd)
df = df.reindex_axis(sorted(df.columns,
key = lambda col: int(col.split("_")[-1])), axis=1)
df.index = [f"Question {i+1}" for i in df.index]
{k:''.join(v) for k,v in zip(df.index, df.values)}
Related
I wrote a webscraper which is downloading table tennis data. There is info about players, match score etc. I would like to display players which lost the most matches per day. I've created data frame and I would like to sum p1_status and p2_status, then I would like to display Surname and number of loses next to player.
https://gyazo.com/19c70e071db78071e83045bfcea0e772
Here is my code:
s = Service("D:/setka/chromedriver.exe")
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=s)
hall = 10
num =1
filename = "C:/Users/filip/result2.csv"
f=open(filename,"w")
headers = "p1_surname, p1_name, p1_score, p2_surname, p2_name, p2_score, p1_status, p2_status \n"
f.write(headers)
while hall <= 10:
for period in [1]:
url = 'https://tabletennis.setkacup.com/en/schedule?date=2021-12-04&hall=' + \
str(hall) + '&' + 'period=' + str(period)
driver.get(url)
time.sleep(5)
divs = driver.find_elements(By.CSS_SELECTOR, "div.score-result")
for div in divs:
data = div.text.split()
#print(data)
if(num % 2) == 0:
f.write(str(data[0]) + "," + str(data[1]) + "," + str(data[2] + "," + "\n"))
else:
f.write(str(data[0]) + "," + str(data[1]) + "," + str(data[2] + ","))
num = num +1
hall =hall + 1
f.close()
df_results=pd.read_csv('C:/Users/filip/result2.csv', sep = r',',
skipinitialspace = True)
df_results.reset_index(drop=True, inplace=True)
df_results.loc[df_results['p1_score'] > df_results['p2_score'], ['p1_status','p2_status']] = ['won','lost']
df_results.loc[df_results['p1_score'] < df_results['p2_score'], ['p1_status','p2_status']] = ['lost','won']
df_results.loc[df_results['p1_score'] == df_results['p2_score'], ['p1_status','p2_status']] = ['not played','not played']
df_results.loc[((df_results['p1_score'] < 3) & (df_results['p1_score']!=0) & (df_results['p2_score'] <3) & (df_results['p2_score']!=0)), ['p1_status','p2_status']] = ['inplay','inplays']
df_results.loc[df_results['p1_status'] != df_results['p2_status'], ['match_status']] = ['finished']
df_results.loc[df_results['p1_status'] == df_results['p2_status'], ['match_status']] = ['not played']
df_results.loc[((df_results['p1_status'] =='inplay') & (df_results['p2_status']=='inplays')), ['match_status']] = ['inplay']
df_results = df_results.dropna(axis=1)
df_results.head(30)
Split your dataframe in 2 parts (p1_, p2_) to count defeats of each player then merge them:
Setup a MRE:
df = pd.DataFrame({'p1_surname': list('AABB'), 'p2_surname': list('CDCD'),
'p1_status': list('LWWW'), 'p2_status': list('WLLL')})
print(df)
# Output:
p1_surname p2_surname p1_status p2_status
0 A C L W
1 A D W L
2 B C W L
3 B D W L
>>> pd.concat([
df.filter(like='p1_').set_index('p1_surname')['p1_status'].eq('L').rename('loses'),
df.filter(like='p2_').set_index('p2_surname')['p2_status'].eq('L').rename('loses')]) \
.groupby(level=0).sum().rename_axis('surname').reset_index()
surname loses
0 A 1
1 B 0
2 C 1
3 D 2
I have 2 separate pandas dataframes, one of which tracks whether the platform of a train station is free or not and another which tracks the movement of the trains (note I am only at proof-of-concept stage, I appreciate my code is not tidy).
Code is as below:
L = []
M = []
x = 0
for i in range(0,k*2):
L.append(0)
M.append(x)
if (i == k):
x = 1
list_of_tuples = list(zip(M, L))
blocks_df = pd.DataFrame(list_of_tuples, columns = ['Direction', 'BlockTaken'])
L = ["London Depot", "London Platform", "Birmingham Platform", "Crossover"]
M = [0,0,0,0]
list_of_tuples = list(zip(L, M))
stations_control = pd.DataFrame(list_of_tuples, columns = ['Location', 'BlockTaken'])
for i in range (0,3600):
if (i%300==0): #Every 5 minutes, a new train enters service
print("Train " + str(TrainNumber) + " leaving depot for " + str(train_df.loc[train_df['Train_Number'] == TrainNumber, 'Start_Station'].iloc[0]) + " at " + str(t.time()) )
train_df.loc[train_df['Train_Number'] == TrainNumber, 'Dep'] = 'N'
train_df.loc[train_df['Train_Number'] == TrainNumber, 'Dep_Time'] = t.time()
train_df.loc[train_df['Train_Number'] == TrainNumber, 'From'] = L[0]
train_df.loc[train_df['Train_Number'] == TrainNumber, 'To'] = L[1]
if(stations_control[train_df.loc[train_df['Train_Number'] == TrainNumber, 'To']]['BlockTaken'] ==0):
print("Platform is free!!!")
t = t + datetime.timedelta(0,300)
#TrainNumber+=1
I know I am doing the if statement 4 lines from the end wrong, but I can't figure out how to do it properly. What I want to check is where the train is going and if the platform is free, print. What is the correct syntax here?
I think it is
b = train_df.loc[train_df['Train_Number'] == TrainNumber, 'To'].iloc[0]
if(stations_control.loc[stations_control['Location']==b]['BlockTaken'] == 0):
Answering my own question here. Thanks for the help guys.
if(stations_control.loc[stations_control['Location']==b].BlockTaken.iloc[0]== 0):
print("Platform is free!!!")
I am trying to store the values obtained from excel sheet cells to a list. The code provided basically collects data from different continuous rows and columns and creates a string of those values. I could work upt o storing the string value but I don't really know how to store the strings in a list, Can anyone help me with this?
for i in range(NR):
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
# need to create a list here to store the commentstr for each iteration
NR = no. of rows, NC = no. of columns
my_list=[]
for i in range(NR):
x=0
print("This TC checks the output for")
for j in range(NC):
inputVariable = str(ws[get_column_letter(ColumnStart+j) + str(rowStart-1)].value)
c = str((ws.cell(row = (rowStart + i),column = (ColumnStart +j)).value))
if (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MaxValRow),column = (ColumnStart+j)).value):
b = '(maximum)'
elif (ws.cell(row = (rowStart + i),column = (ColumnStart+j)).value) == (ws.cell(row = (MinValRow),column = (ColumnStart+j)).value):
b = '(minimum)'
else:
b ='(intermediate)'
Commentstr = str(j+1) + '. The value of input ' + inputVariable + ' =' + " " + c + b
my_list[x]=Commentstr
x+=1
I´m having some troubles on my code, that implements a function that verifies the lenght of a string and return that string with or without spaces.
If String b >= 15 return b
If strinf < 15 return "number os spaces until len(b)2 + string b
But I get a IndexError: list index out of range Error. I cannot figure out where or why.
Traceback:
============= RESTART: C:\Users\pbo\Desktop\Trab_04.py =============
ascii_letters = abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
Traceback (most recent call last):
File "C:\Users\pbo\Desktop\Trab_04.py", line 30, in
o2.append(a(o1[e]))
IndexError: list index out of range
============= ============= ============= ============= =============
Thanks a lot for any possible help.
from random import seed
from random import randint
from random import choice
from string import ascii_letters
seed(930)
def a(b):
if(len(b)<15) :
s = "";
l = len(b)
while(len(s) + len(b) < 15) :
s += " "
s += b
return s
else:
return b
print("ascii_letters = " + ascii_letters)
o1 = []
for e in range(9622):
k = randint(1, 25)
s = ""
for l in range(k):
s = s + choice(ascii_letters)
o1.append(s)
o2 = []
for e in range(9622):
o2.append(a(o1[e]))
print("000000000111111111122222222223333333333")
print("123456789012345678901234567890123456789")
for e in range(9):
print(o1[e] + "|")
print("000000000111111111122222222223333333333")
print("123456789012345678901234567890123456789")
for e in range(9):
print(o2[e] + "|")
x1 = ""
x2 = "a"
x3 = "abc"
x4 = "abcdefghijklmnopqrstuvwxyz"
print("000000000111111111122222222223333333333")
print("123456789012345678901234567890123456789")
print(x1 + "|")
print(x2 + "|")
print(x3 + "|")
print(x4 + "|")
print("000000000111111111122222222223333333333")
print("123456789012345678901234567890123456789")
print(a(x1) + "|")
print(a(x2) + "|")
print(a(x3) + "|")
print(a(x4) + "|")
It looks like your indentation is incorrect (see also What is Python Whitespace and how does it work?).
If you change the following:
o1 = []
for e in range(9622):
k = randint(1, 25)
s = ""
for l in range(k):
s = s + choice(ascii_letters)
o1.append(s)
to:
o1 = []
for e in range(9622):
k = randint(1, 25)
s = ""
for l in range(k):
s = s + choice(ascii_letters)
o1.append(s)
Then, your code appears to work.
The problem is that your list o1 only has one element in it. Snipping out all of the rest of the code, you do this:
s = "some string"
o1 = []
o2 = []
# now has one element
o1.append(s)
for i in range(9622):
# when i > 0, IndexError will be raised
state = o1[i]
o2.append(a(state))
This happens because you are iterating over a range that has no relationship to the size/contents of o1. To fix this, you might consider:
s = "some string"
o1 = []
o2 = []
o1.append(s)
for element in o1:
o2.append(a(element))
The reason being is that now you are iterating over o1 directly, rather than assuming it will always have 9622 elements in it, which it doesn't in this case.
Also, upon further inspection, it looks like your a function is just trying to pad a string with spaces out to 15 characters. Fortunately, there's a builtin for this:
x = "some string"
x.rjust(15)
' some string'
Where rjust will left-insert spaces to create a string of the given length
This is my code. The problem is that the output looks like this
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1}{'negative': -2}{'2015-01-15-21:28:23.mp3': 1}
i want to be like a dictionary.... like this one below so i can read it back as a dictionary and remove the keys from the first subset sum and go on output a second on and so on until no other subset sum exists...
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1, 'negative': -2, '2015-01-15-21:28:23.mp3': 1}
Any ideas?
thanx in advanced
import os, sys,re,gzip, pickle
from itertools import combinations
import json
from datetime import datetime
mp3folder = raw_input('Please copy paste the mp3s path:')
lowerin = input('Please enter your total playlist time in NEGATIVE seconds and hit ENTER:')
r = {}
drk = os.listdir(mp3folder)
drifiles = list(drk)
r = dict.fromkeys(drifiles, 0)
for key in r.keys():
print ('Please enter the duration of...')
print(key)
r[key] = input('in seconds and hit ENTER:')
r['negative'] = lowerin
d = {}
neg = 0
pos = 0
dates = datetime.now()
dates = str(dates)
f = open("dict.txt",'ab')
f.write('\n'+dates+'\n')
f.close()
for (w,v) in r.iteritems():
if v > 0: pos += v
else: neg += v
sums = [0] * (pos - neg + 1)
for (w,v) in r.iteritems():
s = sums[:]
if not s[v - neg]: s[v - neg] = (w,)
for (i, w2) in enumerate(sums):
if w2 and not s[i + v]:
s[i + v] = w2 + (w,)
sums = s
if s[-neg]:
for x in s[-neg]:
d = dict([(x, r[x])])
file('dict.txt','a'+'\n').write(repr(d))
break
f = open('dict.txt','r')
filedata = f.read()
f.close()
newdata = filedata.replace("}{",", ")
f = open('lexiko.txt','w')
f.write(newdata)
f.close()
di = eval(open("lexiko.txt").read())
print di
this will do it