Working with python dictionaries - python

I am writing a function that takes in an argument. From that argument, I want to compare it to a dictionary's set of keys and return the key's value for any matches. So far I have been able to only return the argument matches for the keys.
def func(str):
a = []
b = {'a':'b','c':'d','e':'f'}
for i in str:
if i in b.keys():
a.append(i)
return a
Output sample:
func('abcdefghiabcdefghi')
['a','c','e','a','c','e']
Wanted output:
['b','d','f','b','d','f']

Best not to use str as a variable name. I think your function can be written more simply like this
def func(mystr):
b = {'a':'b','c':'d','e':'f'}
return [b[k] for k in mystr if k in b]
If you don't want to use a list comprehension, then you can fix it like this
def func(mystr):
a = []
b = {'a':'b','c':'d','e':'f'}
for i in mystr:
if i in b: # i in b works the same as i in b.keys()
a.append(b[i]) # look up the key(i) in the dictionary(b) here
return a

Related

Run function in a loop with length defined by a list of strings

How can I run a function passing values from a list?
I have this list
keywords = ['car','water','2 bottles']
and a function defined as follows:
def my_func(param):
b = df.loc[df['Field'].str.contains(param), 'User'].tolist()
print (f'\nThe item "{param}" was bought by in User: ', b)
return b
Currently I am doing it manually:
buyer= my_func('car')
buyer= my_func('water')
buyer= my_func('2 bottles')
However, I would like to run the function and printing the results by iterating param over the list keywords.
Can you please help me with this?
You can use a for loop:
def my_func(param):
b = df.loc[df['Field'].str.contains(param), 'User'].tolist()
print (f'\nThe item "{param}" was bought by in User: ', b)
return b
keywords = ['car','water','2 bottles']
buyers = []
for key in keywords:
buyers.append(my_func(key))
Do regex
b = df.loc[df['Field'].str.contains('|'.join(keywords)), 'User'].tolist()

Will key-value arguments be evaluated in the order they are given?

Can one be sure that key-value arguments will put in the dictionary (kv below) in the order they are given, and that the dictionary will be iterated in that order too? In this peculiar example it matters:
def func(**kv):
s = 0
for key in kv:
s += func2(key,kv[key])
def func2(key,val):
if key=="all":
a=val
b=val
elif key=="a":
a = val
elif key=="a":
b = val
return a+b
a=b=0
func(all=3,a=2,b=1)

Difficulty with dictionary order function

Write a function named "indexed_kvs" that doesn't take any parameters and returns a new key-value store containing the integers from 0 to 36 as values each stored at a key which is a string containing the digits of the integer. For example the key-value "0":0 will be in your returned key-value store (include both 0 and 36 in your list). (My code below)
def indexed_kvs():
d = dict()
for x in range(37):
d[x] = x
return d
I keep on getting the first key and value; how do I get all the keys and values?
You return from inside the loop which is a common mistake that can be avoided altogether by using a dict comprehension, at least in this simple case:
def indexed_kvs():
return {str(x): x for x in range(37)}
As #Loocid comment, the return statement shouldn't be inside the for loop, so the correct code would be:
def indexed_kvs():
d = dict()
for x in range(37):
d[str(x)] = x
return d
You have your "return d", inside you loop. So, what happens is that -
1) The function begins
2) The for loop executes once:
a) adds 0 to the dictionary at key 0,
b) encounters return d, them thinks- 'Okay! I got a return statement. I gotta exit! '
c) And Boom! Your function is done with
So, just move your return d out of you for loop. So, the exiting of the function will take place when, the loop is over.
So your new code should be:
def indexed_kvs():
d = dict()
for x in range(37):
d[str(x)] = x
return d

Removing duplicate characters from a string

How can I remove duplicate characters from a string using Python? For example, let's say I have a string:
foo = 'mppmt'
How can I make the string:
foo = 'mpt'
NOTE: Order is not important
If order does not matter, you can use
"".join(set(foo))
set() will create a set of unique letters in the string, and "".join() will join the letters back to a string in arbitrary order.
If order does matter, you can use a dict instead of a set, which since Python 3.7 preserves the insertion order of the keys. (In the CPython implementation, this is already supported in Python 3.6 as an implementation detail.)
foo = "mppmt"
result = "".join(dict.fromkeys(foo))
resulting in the string "mpt". In earlier versions of Python, you can use collections.OrderedDict, which has been available starting from Python 2.7.
If order does matter, how about:
>>> foo = 'mppmt'
>>> ''.join(sorted(set(foo), key=foo.index))
'mpt'
If order is not the matter:
>>> foo='mppmt'
>>> ''.join(set(foo))
'pmt'
To keep the order:
>>> foo='mppmt'
>>> ''.join([j for i,j in enumerate(foo) if j not in foo[:i]])
'mpt'
Create a list in Python and also a set which doesn't allow any duplicates.
Solution1 :
def fix(string):
s = set()
list = []
for ch in string:
if ch not in s:
s.add(ch)
list.append(ch)
return ''.join(list)
string = "Protiijaayiiii"
print(fix(string))
Method 2 :
s = "Protijayi"
aa = [ ch for i, ch in enumerate(s) if ch not in s[:i]]
print(''.join(aa))
Method 3 :
dd = ''.join(dict.fromkeys(a))
print(dd)
As was mentioned "".join(set(foo)) and collections.OrderedDict will do.
A added foo = foo.lower() in case the string has upper and lower case characters and you need to remove ALL duplicates no matter if they're upper or lower characters.
from collections import OrderedDict
foo = "EugeneEhGhsnaWW"
foo = foo.lower()
print "".join(OrderedDict.fromkeys(foo))
prints eugnhsaw
#Check code and apply in your Program:
#Input= 'pppmm'
s = 'ppppmm'
s = ''.join(set(s))
print(s)
#Output: pm
If order is important,
seen = set()
result = []
for c in foo:
if c not in seen:
result.append(c)
seen.add(c)
result = ''.join(result)
Or to do it without sets:
result = []
for c in foo:
if c not in result:
result.append(c)
result = ''.join(result)
def dupe(str1):
s=set(str1)
return "".join(s)
str1='geeksforgeeks'
a=dupe(str1)
print(a)
works well if order is not important.
d = {}
s="YOUR_DESIRED_STRING"
res=[]
for c in s:
if c not in d:
res.append(c)
d[c]=1
print ("".join(res))
variable 'c' traverses through String 's' in the for loop and is checked if c is in a set d (which initially has no element) and if c is not in d, c is appended to the character array 'res' then the index c of set d is changed to 1. after the loop is exited i.e c finishes traversing through the string to store unique elements in set d, the resultant res which has all unique characters is printed.
Using regular expressions:
import re
pattern = r'(.)\1+' # (.) any character repeated (\+) more than
repl = r'\1' # replace it once
text = 'shhhhh!!!
re.sub(pattern,repl,text)
output:
sh!
As string is a list of characters, converting it to dictionary will remove all duplicates and will retain the order.
"".join(list(dict.fromkeys(foo)))
Functional programming style while keeping order:
import functools
def get_unique_char(a, b):
if b not in a:
return a + b
else:
return a
if __name__ == '__main__':
foo = 'mppmt'
gen = functools.reduce(get_unique_char, foo)
print(''.join(list(gen)))
def remove_duplicates(value):
var=""
for i in value:
if i in value:
if i in var:
pass
else:
var=var+i
return var
print(remove_duplicates("11223445566666ababzzz###123#*#*"))
from collections import OrderedDict
def remove_duplicates(value):
m=list(OrderedDict.fromkeys(value))
s=''
for i in m:
s+=i
return s
print(remove_duplicates("11223445566666ababzzz###123#*#*"))
mylist=["ABA", "CAA", "ADA"]
results=[]
for item in mylist:
buffer=[]
for char in item:
if char not in buffer:
buffer.append(char)
results.append("".join(buffer))
print(results)
output
ABA
CAA
ADA
['AB', 'CA', 'AD']

Finding matching keys in two large dictionaries and doing it fast

I am trying to find corresponding keys in two different dictionaries. Each has about 600k entries.
Say for example:
myRDP = { 'Actinobacter': 'GATCGA...TCA', 'subtilus sp.': 'ATCGATT...ACT' }
myNames = { 'Actinobacter': '8924342' }
I want to print out the value for Actinobacter (8924342) since it matches a value in myRDP.
The following code works, but is very slow:
for key in myRDP:
for jey in myNames:
if key == jey:
print key, myNames[key]
I've tried the following but it always results in a KeyError:
for key in myRDP:
print myNames[key]
Is there perhaps a function implemented in C for doing this? I've googled around but nothing seems to work.
Thanks.
Use sets, because they have a built-in intersection method which ought to be quick:
myRDP = { 'Actinobacter': 'GATCGA...TCA', 'subtilus sp.': 'ATCGATT...ACT' }
myNames = { 'Actinobacter': '8924342' }
rdpSet = set(myRDP)
namesSet = set(myNames)
for name in rdpSet.intersection(namesSet):
print name, myNames[name]
# Prints: Actinobacter 8924342
You could do this:
for key in myRDP:
if key in myNames:
print key, myNames[key]
Your first attempt was slow because you were comparing every key in myRDP with every key in myNames. In algorithmic jargon, if myRDP has n elements and myNames has m elements, then that algorithm would take O(n×m) operations. For 600k elements each, this is 360,000,000,000 comparisons!
But testing whether a particular element is a key of a dictionary is fast -- in fact, this is one of the defining characteristics of dictionaries. In algorithmic terms, the key in dict test is O(1), or constant-time. So my algorithm will take O(n) time, which is one 600,000th of the time.
in python 3 you can just do
myNames.keys() & myRDP.keys()
for key in myRDP:
name = myNames.get(key, None)
if name:
print key, name
dict.get returns the default value you give it (in this case, None) if the key doesn't exist.
You could start by finding the common keys and then iterating over them. Set operations should be fast because they are implemented in C, at least in modern versions of Python.
common_keys = set(myRDP).intersection(myNames)
for key in common_keys:
print key, myNames[key]
Best and easiest way would be simply perform common set operations(Python 3).
a = {"a": 1, "b":2, "c":3, "d":4}
b = {"t1": 1, "b":2, "e":5, "c":3}
res = a.items() & b.items() # {('b', 2), ('c', 3)} For common Key and Value
res = {i[0]:i[1] for i in res} # In dict format
common_keys = a.keys() & b.keys() # {'b', 'c'}
Cheers!
Use the get method instead:
for key in myRDP:
value = myNames.get(key)
if value != None:
print key, "=", value
You can simply write this code and it will save the common key in a list.
common = [i for i in myRDP.keys() if i in myNames.keys()]
Copy both dictionaries into one dictionary/array. This makes sense as you have 1:1 related values. Then you need only one search, no comparison loop, and can access the related value directly.
Example Resulting Dictionary/Array:
[Name][Value1][Value2]
[Actinobacter][GATCGA...TCA][8924342]
[XYZbacter][BCABCA...ABC][43594344]
...
Here is my code for doing intersections, unions, differences, and other set operations on dictionaries:
class DictDiffer(object):
"""
Calculate the difference between two dictionaries as:
(1) items added
(2) items removed
(3) keys same in both but changed values
(4) keys same in both and unchanged values
"""
def __init__(self, current_dict, past_dict):
self.current_dict, self.past_dict = current_dict, past_dict
self.set_current, self.set_past = set(current_dict.keys()), set(past_dict.keys())
self.intersect = self.set_current.intersection(self.set_past)
def added(self):
return self.set_current - self.intersect
def removed(self):
return self.set_past - self.intersect
def changed(self):
return set(o for o in self.intersect if self.past_dict[o] != self.current_dict[o])
def unchanged(self):
return set(o for o in self.intersect if self.past_dict[o] == self.current_dict[o])
if __name__ == '__main__':
import unittest
class TestDictDifferNoChanged(unittest.TestCase):
def setUp(self):
self.past = dict((k, 2*k) for k in range(5))
self.current = dict((k, 2*k) for k in range(3,8))
self.d = DictDiffer(self.current, self.past)
def testAdded(self):
self.assertEqual(self.d.added(), set((5,6,7)))
def testRemoved(self):
self.assertEqual(self.d.removed(), set((0,1,2)))
def testChanged(self):
self.assertEqual(self.d.changed(), set())
def testUnchanged(self):
self.assertEqual(self.d.unchanged(), set((3,4)))
class TestDictDifferNoCUnchanged(unittest.TestCase):
def setUp(self):
self.past = dict((k, 2*k) for k in range(5))
self.current = dict((k, 2*k+1) for k in range(3,8))
self.d = DictDiffer(self.current, self.past)
def testAdded(self):
self.assertEqual(self.d.added(), set((5,6,7)))
def testRemoved(self):
self.assertEqual(self.d.removed(), set((0,1,2)))
def testChanged(self):
self.assertEqual(self.d.changed(), set((3,4)))
def testUnchanged(self):
self.assertEqual(self.d.unchanged(), set())
unittest.main()
def combine_two_json(json_request, json_request2):
intersect = {}
for item in json_request.keys():
if item in json_request2.keys():
intersect[item]=json_request2.get(item)
return intersect

Categories

Resources