How to convert "index" to a "string"? - python

Here is a working example code:
data = {'name': ['Joe', 'Mike', 'Jack', 'Hack', 'David', 'Marry', 'Wansi', 'Sidy', 'Jason', 'Even'],
'age': [25, 32, 18, np.nan, 15, 20, 41, np.nan, 37, 32],
'gender': [1, 0, 1, 1, 0, 1, 0, 0, 1, 0],
'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=labels)
print(df)
print("---------------------------")
obj = df[df["age"]>40].index.format()
print("obj is",type(obj))
I hope obj as a string (), but the above result is list().
What should I do to correct it ?

You can simply put obj = obj[0] and it will then become a string
data = {'name': ['Joe', 'Mike', 'Jack', 'Hack', 'David', 'Marry', 'Wansi', 'Sidy', 'Jason', 'Even'],
'age': [25, 32, 18, np.nan, 15, 20, 41, np.nan, 37, 32],
'gender': [1, 0, 1, 1, 0, 1, 0, 0, 1, 0],
'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=labels)
print(df)
print("---------------------------")
obj = df[df["age"]>40].index.format()
obj = obj[0]
print("obj is",type(obj))

obj = df[df["age"]>40].index.format()[0]
print("obj is",obj,type(obj))
obj is g <class 'str'>

Related

remove the key value if a column is repeated and taking only the first one in nested dictionary

Input: I have a dictionary in this form with a lot more data
d = {
'ag': pd.DataFrame({'ID': ['id1', 'id1', 'id1'], 'name': ['a', 's', 'd'], 'num': [10, 7, 2]}),
'jk': pd.DataFrame({'ID': ['id2', 'id2', 'id2'], 'name': ['w', 'r', 'y'], 'num': [15, 8, 1]}),
'rp': pd.DataFrame({'ID': ['id1', 'id1'], 'name': ['f', 'n'], 'num': [13, 11]})
}
Expected Output: I want to remove the key value from dictionary(d), if the ID(id1) is repeated again in next key(rp).
d = {
'ag': pd.DataFrame({'ID': ['id1', 'id1', 'id1'], 'name': ['a', 's', 'd'], 'num': [10, 7, 2]}),
'jk': pd.DataFrame({'ID': ['id2', 'id2', 'id2'], 'name': ['w', 'r', 'y'], 'num': [15, 8, 1]})
}
code I tried:
new_d = {}
unique_ids = set()
for key in sorted(d.keys()):
key_ids = set(d[key]['ID'].tolist())
if not(key_ids & unique_ids):
new_d[key] = d[key]
unique_ids |= key_ids
print(new_d)
I need a different approach, this is not giving me good results for a large dataset.
Came up with a function to do the task
def remove_duplicate_key(d):
# 'dt' temp variable to iterate over
dt=d.copy()
for i, key in zip(range(len(dt)), dt.keys()):
var = 'id'+str(i+1)
temp_df=dt.get(key, None)
if temp_df['ID'].value_counts().index[0]!=var:
d.pop(key, None)
print(d)
else:
continue
return d
Its creating the variable var='id'+str(i) since id is anyway incrementing. Then call the function remove_duplicate_key(d)

Group by column , get mean of another column, return max

Say I have a pandas dataframe like this:
exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
I want to write a function to group by one column, get the average for each category, and then return the highest average
The current way I did this
avg = df.groupby('attempts')['score'].mean()
print(avg.max())
I want to write something like this
def return_max_average(df, category_column, numerical_column):
avg = df.groupby('category_column')['numerical_column'].mean()
return avg.max()
What would be the best way to write this function?

iterating on next item in sublist with condition in python

I have a list which is sorted and grouped based on 2 element of sublist like below
[[[2178393, 'a', 'online', 0, 20], [2178394, 'a', 'away', 0, 30], [2178395, 'a', 'away', 0, 40]],[[2178389, 'b', 'online', 0, 10], [2178390, 'b', 'online', 0, 15], [2178392, 'b', 'online', 1, 25], [2178391, 'b', 'online', 1, 30], [2178397, 'b', 'away', 1, 40]], [[2178388, 'c', 'online', 0, 15], [2178396, 'c', 'away', 0, 20], [2178402, 'c', 'online', 0,25], [2178408, 'c', 'online', 1, 50]]]
in above there are 3 sublists that contains the lists, i want to add 5th element(4th index) from next list to present list inside the sublists. In simple adding the 5th element(4th index) of every next sublist to the present sublist.
the output should be
[[[2178393, 'a', 'online', 0, 20,30], [2178394, 'a', 'away', 0, 30,40], [2178395, 'a', 'away', 0, 40]],[[2178389, 'b', 'online', 0, 10,15], [2178390, 'b', 'online', 0, 15,25], [2178392, 'b', 'online', 1, 25,30], [2178391, 'b', 'online', 1, 30,40], [2178397, 'b', 'away', 1, 40]], [[2178388, 'c', 'online', 0, 15,20], [2178396, 'c', 'away', 0, 20,25], [2178402, 'c', 'online', 0,25,50], [2178408, 'c', 'online', 1, 50]]]
Please help me.
Here is the code to achieve that
for outer in range(0,len(list)):
for inner in range(0,len(list[outer])-1):
list[outer][inner].append(list[outer][inner+1][4])
Desired Output
Use a nested list comprehension along with zip_longest. This takes advantage of the fact that each of the innermost lists just needs the last element of the next list to be appended to it, with the last innermost list being unchanged.
from itertools import zip_longest
data = [[[2178393, 'a', 'online', 0, 20], [2178394, 'a', 'away', 0, 30], [2178395, 'a', 'away', 0, 40]],[[2178389, 'b', 'online', 0, 10], [2178390, 'b', 'online', 0, 15], [2178392, 'b', 'online', 1, 25], [2178391, 'b', 'online', 1, 30], [2178397, 'b', 'away', 1, 40]], [[2178388, 'c', 'online', 0, 15], [2178396, 'c', 'away', 0, 20], [2178402, 'c', 'online', 0,25], [2178408, 'c', 'online', 1, 50]]]
expected = [[[2178393, 'a', 'online', 0, 20,30], [2178394, 'a', 'away', 0, 30,40], [2178395, 'a', 'away', 0, 40]],[[2178389, 'b', 'online', 0, 10,15], [2178390, 'b', 'online', 0, 15,25], [2178392, 'b', 'online', 1, 25,30], [2178391, 'b', 'online', 1, 30,40], [2178397, 'b', 'away', 1, 40]], [[2178388, 'c', 'online', 0, 15,20], [2178396, 'c', 'away', 0, 20,25], [2178402, 'c', 'online', 0,25,50], [2178408, 'c', 'online', 1, 50]]]
result = [[bottom_list_first + bottom_list_second[-1:]
for bottom_list_first, bottom_list_second
in zip_longest(middle_list, middle_list[1:], fillvalue=[])]
for middle_list in data]
print(result == expected)
Output:
True

Searching for elements within Python list using conditional statements

main_col = ['Name', 'Age', 'Gender']
main_row = [['Peter', 18, 'M'], ['Sam', 20, 'M'], ['Carol', 19, 'F'], ['Malcom', 21, 'M'], ['Oliver', 25, 'M'], ['Mellisa', 21, 'F'], ['Minreva', 18, 'F'], ['Bruce', 23, 'M'], ['Clarke', 24, 'M'], ['Zuck', 22, 'M'], ['Slade', 23, 'M'], ['Wade', 21, 'M'], ['Felicity', 22, 'F'], ['Selena', 23, 'F'], ['Ra\'s Al Gul',700, 'M']]
I am trying to make a program where main_col are column names and main_row have row information for each column (in a 2d list).
How can I write a piece of code for a search query which can search row where:
Name = 'Carol' and Age = 19.
Name = 'Carol' and Gender = 'F'
Age= 22 or Gender = 'M'
The following code is giving result for the 3rd part:-
search = {'Age' : 22, 'Gender' : 'M'}
for i in search:
idx = main_col.index(i)
for j in main_row:
if(j[idx] == search[i]):
print(j)
You could give this a try, its somewhat complicated but should get the job done:
AND = 'and'
OR = 'or'
# Check if the array is a match
def is_found(value, aggregator, search_terms):
if aggregator == AND:
is_found = True
for col, val in search_terms.items():
if value[val['idx']] != val['val']:
is_found = False
break
else:
is_found = False
for col, val in search_terms.items():
if value[val['idx']] == val['val']:
is_found = True
break
return is_found
# Perform the search
def search(columns, values, aggregator, search_filters):
# Format the search values into something we can use
# {
# 'col': { 'idx': <column index>, 'val': <search value> }
# }
search_terms = {
col: { 'idx': columns.index(col), 'val': val }
for col, val in search_filters.items()
}
return [
val
for val in values
if is_found(val, aggregator, search_terms)
]
if __name__ == "__main__":
main_col = ['Name', 'Age', 'Gender']
main_row = [['Peter', 18, 'M'], ['Sam', 20, 'M'], ['Carol', 19, 'F'], ['Malcom', 21, 'M'], ['Oliver', 25, 'M'], ['Mellisa', 21, 'F'], ['Minreva', 18, 'F'], ['Bruce', 23, 'M'], ['Clarke', 24, 'M'], ['Zuck', 22, 'M'], ['Slade', 23, 'M'], ['Wade', 21, 'M'], ['Felicity', 22, 'F'], ['Selena', 23, 'F'], ['Ra\'s Al Gul',700, 'M']]
search_filter = {
'Age': 22, 'Gender': 'M'
}
print(search(main_col, main_row, OR ,search_filter))
search_filter = {
'Name': 'Carol', 'Age': 19
}
print(search(main_col, main_row, AND ,search_filter))
If you want to stick with your pattern, this is an option:
search = {'Age' : 21, 'Gender' : 'M'}
idxs = [ (main_col.index(key), val) for key, val in search.items()]
tmp = [ set(tuple(person) for person in main_row if person[i] == v) for i, v in idxs ]
res = set.intersection(*tmp)
#=> {('Wade', 21, 'M'), ('Malcom', 21, 'M')}
NOTE: I used intersection to return AND, but you can customise to any of the operation available on set (https://docs.python.org/3.7/library/stdtypes.html#set): union, intersection, difference, ...
You can convert to a handy method:
def lookup(search, main_row, main_col):
idxs = [ (main_col.index(key), val) for key, val in search.items()]
tmp = [ set(tuple(person) for person in main_row if person[i] == v) for i, v in idxs ]
return set.intersection(*tmp)
lookup({'Age' : 21}, main_row, main_col)
#=> {('Wade', 21, 'M'), ('Mellisa', 21, 'F'), ('Malcom', 21, 'M')}
lookup({'Age' : 21, 'Gender' : 'M'}, main_row, main_col)
#=> {('Malcom', 21, 'M'), ('Wade', 21, 'M')}
lookup({'Age' : 21, 'Gender' : 'M', 'Name': 'Malcom'}, main_row, main_col)
#=> {('Malcom', 21, 'M')}
Anyway, I'd suggest to use a dict from main_row:
main_row = [['Peter', 18, 'M'], ['Sam', 20, 'M'], ['Carol', 19, 'F'], ['Malcom', 21, 'M'], ['Oliver', 25, 'M'], ['Mellisa', 21, 'F'], ['Minreva', 18, 'F'], ['Bruce', 23, 'M'], ['Clarke', 24, 'M'], ['Zuck', 22, 'M'], ['Slade', 23, 'M'], ['Wade', 21, 'M'], ['Felicity', 22, 'F'], ['Selena', 23, 'F'], ['Ra\'s Al Gul',700, 'M'], ['Oliver', 31, 'M']]
This builds the dictionary people, leaving apart the first list of headers:
people = [ {'name':name, 'age':age, 'gender':gender} for name, age, gender in main_row]
#=> [{'name': 'Peter', 'age': 18, 'gender': 'M'}, {'name': 'Sam', 'age': 20, 'gender': 'M'}, ....
Then you can query for example in this way:
next(person for person in people if person['name'] == "Oliver" and person['age'] == 31 )
#=> {'name': 'Oliver', 'age': 31, 'gender': 'M'}
the_21_years_old = [ person for person in people if person['age'] == 21 ]
#=> [{'name': 'Malcom', 'age': 21, 'gender': 'M'}, {'name': 'Mellisa', 'age': 21, 'gender': 'F'}, {'name': 'Wade', 'age': 21, 'gender': 'M'}]
You can the do whatever you need with the returned "records":
for person in the_21_years_old:
print(person['name'], person['age'])
# Malcom 21
# Mellisa 21
# Wade 21

Looping through a dictionary

I'm trying to loop through a dictionary, and starting from the first key, it looks its
value and loops through each element in the list and doubles it. Once it's done with the list it adds the key and the value to a new dictionary and then continues to the next key in the dictionary and continues the process. The value that is attached with each key will always be a list. Preferably without importing any modules.
Some input's and outputs to better understand what the code is supposed to be doing(the order will be always be different, so sometimes you'll have 'b' first or 'a' first.):
>>> create_dict({'a': [1, 2], 'b': [3, 4]})
{'a': ['1', '1', '2', '2'], 'b': ['3', '3', '4', '4']}
>>> create_dict({'a': ['c', 'd'], 'b': ['d', 'e']})
{'a': ['c', 'c', 'd', 'd'], 'b': ['d', 'd', 'e', 'e']}
>>> create_dict({'a': ['e', 'f'], 'b': ['g', 'h']})
{'a': ['e', 'e', 'f', 'f'], 'b': ['g', 'g', 'h', 'h']}
What I've written so far:
def create_dict(sample_dict):
'''(dict) -> dict
Given a dictionary, loop through the value in the first key and double
each element in the list and add the result to a new dictionary, move on
to the next key and continue the process.
>>> create_dict({'a': [1, 2], 'b': [3, 4]})
{'a': ['1', '1', '2', '2'], 'b': ['3', '3', '4', '4']}
>>> create_dict({'a': ['c', 'd'], 'b': ['d', 'e']})
{'a': ['c', 'c', 'd', 'd'], 'b': ['d', 'd', 'e', 'e']}
>>> create_dict({'name': ['bob', 'smith'], 'last': ['jones', 'li']})
{'name': ['bob', 'bob', 'smith', 'smith'], 'last': ['jones', 'jones', 'li', 'li']}
'''
new_dict = {}
new_list = []
for index in sample_dict.values():
for element in index:
new_list.extend([element] * 2)
return new_dict
However the result I'm getting does not quite match what I had in mind:
>>> create_dict({'name': ['bob', 'smith'], 'last': ['jones', 'li']})
{'last': ['jones', 'jones', 'li', 'li', 'bob', 'bob', 'smith', 'smith'], 'name': ['jones', 'jones', 'li', 'li', 'bob', 'bob', 'smith', 'smith']}
>>> create_dict({'a': [1, 2], 'b': [3, 4]})
{'b': [3, 3, 4, 4, 1, 1, 2, 2], 'a': [3, 3, 4, 4, 1, 1, 2, 2]}
Thank you for those who help :)
I think you're initializing your new_list too soon. It's grabbing too much data
So, try this:
def create_dict(sample_dict):
new_dict = {}
for key in sample_dict:
new_list = []
for val in sample_dict[key]:
new_list.extend([val] * 2)
new_dict[key] = new_list
return new_dict
print create_dict({'a': [1, 2], 'b': [3, 4]})
It returns {'a': [1, 1, 2, 2], 'b': [3, 3, 4, 4]}
This can be a lot simpler with dictionary comprehensions
d = {'a': ['c', 'd'], 'b': ['d', 'e']}
{key:[y for z in zip(value, value) for y in z] for (key, value) in d.items()}
{'a': ['c', 'c', 'd', 'd'], 'b': ['d', 'd', 'e', 'e']}
def create_dict(sample_dict):
new_dict = {} #build dict straight
for key,value in sample_dict.items(): #.items() returns tuples: (key,val)
new_list = [] #start with a new list for each pair in the dict
for element in value: #go over each element in 'val'
new_list.extend([element,element])
new_dict[key] = new_list
return new_dict
print create_dict({'name': ['bob', 'smith'], 'last': ['jones', 'li']})
Outputs:
>>>
{'last': ['jones', 'jones', 'li', 'li'], 'name': ['bob', 'bob', 'smith', 'smith']}

Categories

Resources