I cannot seem to offset iterator in loop - python

I am trying to loop through iterator with offset [i+1].
I have enumerated my dict. Now I want to access dfc with function offsetting second dataframe.
dfc = dict()
mydict = {1: 6, 2: 4,3: 10, 4: 7, 5: 3}
for i, (k, v) in enumerate(mydict.items()):
dfc[(i)] = v
print("index: {}, key: {}, value: {}".format(i, k, v))
for i in range(0,5,1):
result[i] = dfc[i] * dfc[i+1]
But I got this error:
File "<ipython-input-139-b31501e8f8af>", line 2, in <module>
result[i] = dfc[i] * dfc[i+1]
KeyError: 5

You access non existing keys, thats why the error. You can fix this:
mydict = {1: 6, 2: 4, 3: 10, 4: 7, 5: 3}
# there is no sense in creating dfc - if you need a dublicate, use dfc = dict(mydict)
sorted_keys = sorted(mydict) # sort keys - dicts are unordered / insert ordered from 3.7 on
for k in sorted_keys:
print(f"index: {k}, key: {k}, value: {mydict[k]}")
result = {}
for i in sorted_keys:
result[i] = mydict[i] * mydict.get(i+1,1) # avoid key error, if not existst, mult by 1
for k in sorted_keys:
print(f"index: {k}, key: {k}, value: {result[k]}")
Output:
index: 1, key: 1, value: 6
index: 2, key: 2, value: 4
index: 3, key: 3, value: 10
index: 4, key: 4, value: 7
index: 5, key: 5, value: 3
index: 1, key: 1, value: 24
index: 2, key: 2, value: 40
index: 3, key: 3, value: 70
index: 4, key: 4, value: 21
index: 5, key: 5, value: 3
Using dict.get(key,default) allows to try to get the value and if not present allows a default 1. 1*whatever wont change the result.
See Why dict.get(key) instead of dict[key]?

Related

Finding the length of keys of dictionary based on values

input_dict = {'ab':12, 'cd':4, 'ef':1, 'gh':8, 'kl':9}
out_dict = 2
Is there any way to find the length of keys of the dictionary, if the values in dictionary are greater than 2 and less than 9?
Try this,
In [61]: len([v for v in d.values() if 2 < v < 9])
Out[61]: 2
I think you want to find number of items in dictionary where value is 2 < v < 9:
input_dict = {"ab": 12, "cd": 4, "ef": 1, "gh": 8, "kl": 9}
out = sum(2 < v < 9 for v in input_dict.values())
print(out)
Prints:
2
Just returning the relevant lengths:
[len(k) for k, v in input_dict.items() if 2 < v < 9]
Returns:
[2, 2]

Python - Is there more efficient way to convert the string values in Dict to get unique numbers for each str

I have a data which consist of 2 million records, I am trying to convert the values to number and save it in a dictionary. Then use that dictionary to use it as a lookup. All to reduce the size of the file.
the data looks like this
[{
'a' : ['one','two'],
'b' : 'fine',
'c' : ['help']},
{
'a' : ['four','hen'],
'b' : 'happy',
'c' : ['mouse']},
{
'a' : ['two','hen'],
'b' : 'fine'}.......]
def convertDataToNumber(newdata):
dataR = []
dataRD = {}
result=[]
cin = 0
ctr = 1
# all_keys = {k for d in newdata for k in d.keys()}
for d in newdata:
for key,val in d.items():
if isinstance(val,type([])):
for l in val:
if l not in dataR:
dataR.append(l)
dataRD[(dataR[cin])] = ctr
ctr = ctr + 1
cin = cin + 1
d[key] = [dataRD.get(x,x) for x in d[key]]
if isinstance(val,str):
if val not in dataR:
dataR.append(val)
dataRD[(dataR[cin])] = ctr
ctr = ctr + 1
cin = cin + 1
d[key] = [dataRD.get(x,x) for x in [d[key]]]
return dataRD,newdata
Is there a better way to convert the values to numbers.
currently it is taking around 1 hour to execute this operation.
output:
[{'a' : [1,2],
'b':[3],
'c':[4]},
{'a' : [5,6],
'b':[7],
'c':[8]},
{'a':[2,6],
'b':[3]}]
You can create dict for saving each str with a unique ID and append a new string in dict and use the store str and number. (With this approach we iterate over dict one-time and on each iterate over item of dict and seeing each str if exist in dict use the number if not exist store a new number for that str.)
def cnvrt_num(v, mem_cat):
if isinstance(v, list):
res = []
for i in v:
mem_cat[i] = mem_cat.get(i, len(mem_cat)+1)
res.append(mem_cat[i])
else:
mem_cat[v] = mem_cat.get(v, len(mem_cat)+1)
res = [mem_cat[v]]
return res
mem_cat = {}
for dct in lst:
for k,v in dct.items():
dct[k] = cnvrt_num(v, mem_cat)
print(mem_cat)
# {'one': 1, 'two': 2, 'fine': 3, 'help': 4, 'four': 5, 'hen': 6, 'happy': 7, 'mouse': 8}
print(lst)
[
{'a': [1, 2], 'b': [3], 'c': [4]},
{'a': [5, 6], 'b': [7], 'c': [8]},
{'a': [2, 6], 'b': [3]}
]
Input:
lst = [
{'a' : ['one','two'],'b' : 'fine','c' : ['help']},
{'a' : ['four','hen'],'b' : 'happy','c' : ['mouse']},
{'a' : ['two','hen'],'b' : 'fine'}]

Return dictionary list in for loop statement

so, I have this dataframe
I need to replace that categorical column into ordinal/numerical
So if you processing it one by one it would look like:
labels = df_main_correlation['job_level'].astype('category').cat.categories.tolist()
replace_map_comp = {'job_level' : {k: v for k,v in zip(labels,list(range(1,len(labels)+1)))}}
print(replace_map_comp)
It will return
{'job_level': {'JG03': 1, 'JG04': 2, 'JG05': 3, 'JG06': 4}}
but you can do this using for loop in order to process all the columns right?
I tried this one
columns_categorical =list(df_main_correlation.select_dtypes(['object']).columns) #take the columns I want to process
replace_map_comp_list = []
for i, column in enumerate(columns_categorical):
labels = df_main_correlation[column].astype('category').cat.categories.tolist()
replace_map_comp = {column : {k: v for k,v in zip(labels,list(range(1,len(labels)+1)))}} # Return dictionary
print(replace_map_comp)
replace_map_comp_list.append(replace_map_comp[i])
replace_map_comp_list
But it only returns
{'job_level': {'JG03': 1, 'JG04': 2, 'JG05': 3, 'JG06': 4}}
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-202-acc2ad8defaa> in <module>()
8 #df_main_correlation.replace(replace_map_comp, inplace=True)
9
---> 10 replace_map_comp_list.append(replace_map_comp[i])
11 replace_map_comp_list
KeyError: 0
My expected result would be
{'job_level': {'JG03': 1, 'JG04': 2, 'JG05': 3, 'JG06': 4}}
{'person_level': {'PG01': 1, 'PG02': 2, 'PG03': 3, 'PG04': 4, 'PG05': 5, 'PG06': 6, 'PG07': 7, 'PG08': 8}}
{'Employee_type': {'RM_type_A': 1, 'RM_type_B': 2, 'RM_type_C': 3}}
any advices?
Consider df:
In [1543]: df
Out[1543]:
job_level person_level Employee_type
0 JG05 PG06 RM_type_A
1 JG04 PG04 RM_type_A
2 JG04 PG05 RM_type_B
3 JG03 PG03 RM_type_C
Use collections.Counter with Dictionary Comprehension:
In [1539]: from collections import Counter
In [1537]: x = df.to_dict('list')
In [1544]: res = {k: Counter(v) for k,v in x.items()}
In [1545]: res
Out[1545]:
{'job_level': Counter({'JG05': 1, 'JG04': 2, 'JG03': 1}),
'person_level': Counter({'PG06': 1, 'PG04': 1, 'PG05': 1, 'PG03': 1}),
'Employee_type': Counter({'RM_type_A': 2, 'RM_type_B': 1, 'RM_type_C': 1})}
Counter itself returns a dict.
try this, not sure
replace_map_comp_list.append(replace_map_comp['job_level'][column])

Separating values into dictionaries and counting them in Python

I’m trying to take a list of numbers, separate the values into dictionaries based on the int and float types, and then count the number of their occurrences. I’m having issues with the logic.
With the ideal output looking like so:
'int' : [1 : 3, 2 : 4, 5 : 1, 6 : 2],
'float' : [1.0 : 2, 2.3 : 4, 3.4 : 4]
This is what I have so far, and I keep pounding my head:
values = [1, 2.0, 3.0, 1, 1, 3, 4.0, 2, 1.0]
types = {
'int': [],
'float': []
}
for obj in values:
if isinstance(obj, int):
types['int'].append(obj)
elif isinstance(obj, float):
types['float'].append(obj)
for v in types:
if v not in types['int']:
counts = 0
counts[v] += 1
elif v not in types['float']:
counts = 0
counts[v] += 1
print(counts)
With the ideal output being:
'int' : [1 : 3, 2 : 4, 5 : 1, 6 : 2],
'float' : [1.0 : 2, 2.3 : 4, 3.4 : 4]
First half seems fine, but second half can improve.
try:
for k,v in types.items():
print(k, len(v))
Sample input:
values = [1.1,2,3.1]
Gives this output:
int 1
float 2
How about something like this:
This leverages the type function to give you the keys for the resulting array ('int' and 'float') without having to specify those strings.
numbers = [1,1,1,2.1,2.1,2.2, 3,3.1]
def tally_by_type(numbers):
result = {}
for v in numbers:
k = type(v).__name__
result[k] = result.get(k, {})
result[k][v] = result[k].get(v,0)
result[k][v] = result[k][v] + 1
return result
tally_by_type(numbers)
{'int': {1: 3, 3: 1}, 'float': {2.1: 2, 2.2: 1, 3.1: 1}}
Interestingly, this also works if you have strings in there
tally_by_type([1,2,3,3,3,'a','a','b'])
{'int': {1: 1, 2: 1, 3: 3}, 'str': {'a': 2, 'b': 1}}
you can try something like this:
ints={}
floats={}
list=[3.5,27,87.8,1.02,66]
for val in list:
if isinstance(val,int):
ints[str(val)]=val
elif isinstance(val,float):
floats[str(val)]=val
print("ints dictionary\n",ints,"number of instances",len(ints))
print("floats dictionary\n",floats,"number of instances",len(floats))
which prints:
ints dictionary
{'27': 27, '66': 66} number of instances 2
floats dictionary
{'3.5': 3.5, '87.8': 87.8, '1.02': 1.02} number of instances 3
I did not quite get what dictionary keys you want to use though, assumed you don't really need them.

Return key with highest value

I have the following graph:
graph = {0 : {5:6, 4:8},
1 : {4:11},
2 : {3: 9, 0:12},
3 : {},
4 : {5:3},
5 : {2: 7, 3:4}}
I am trying to return the key that has the highest value in this graph. The expected output in this case would be 2 as key 2 has the highest value of 12.
Any help on how I can achieve this would be greatly appreciated.
Find the key whose maximum value is maximal:
max((k for k in graph), key=lambda k: max(graph[k].values(), default=float("-inf")))
The empty elements are disqualified by the ridiculous maximum. Alternately, you can just pre-filter such keys:
max((k for k in graph if graph[k]), key=lambda k: max(graph[k].values()))
Assuming it's all positive numbers
graph = {0 : {5:6, 4:8},
1 : {4:11},
2 : {3: 9, 0:12},
3 : {},
4 : {5:3},
5 : {2: 7, 3:4}}
highestKey = 0
max = 0
for key, value in graph.items():
for key2, value2 in value.items():
if (max < value2):
max = value2
highestKey = key
print(highestKey)
You can also create (max_weight, key) tuples for each key and get the max of those:
max_val = max((max(e.values()), k) for k, e in graph.items() if e)
# (12, 2)
print(max_val[1])
# 2
Note that we don't need a custom key function for max here because the first value in the tuple is the one we want max to consider.
The recursive solution is below. Does not make assumptions about depth of your tree. Only assumes that data types are either int, float or dict
import type
def getLargest(d):
def getLargestRecursive(d):
if type(d) == “dict”:
getLargestRecursive(d)
elif not largest or d > largest:
largest = d
largest = None
getLargestRecursive(d)
return largest
largestValues = [getLargest(k) for k in graph.keys]
answer = largestValues.index(max(largestValues))
You can also use dict comprehension to flat the dictionary and then print the max key,
graph = {0 : {5:6, 4:8},
1 : {4:11},
2 : {3: 9, 0:12},
3 : {},
4 : {5:3},
5 : {2: 7, 3:4}}
flat_dcit = {k:a for k, v in graph.items() for a in v.values()}
print(max(flat_dcit.keys(), key=(lambda k: flat_dcit[k])))
# output,
2
You can also try flattening your dictionary into a list of tuples then take the max of the tuple with the highest second value:
from operator import itemgetter
graph = {
0: {5: 6, 4: 8},
1: {4: 11},
2: {3: 9, 0: 12},
3: {},
4: {5: 3},
5: {2: 7, 3: 4},
}
result = max(((k, v) for k in graph for v in graph[k].values()), key=itemgetter(1))
print(result)
# (2, 12)
print(result[0])
# 2

Categories

Resources