I'm having trouble accessing some values in a dictionary I made. In my code, I made two different dictionaries while reading through a file. The code I have is this:
nonterminal_rules = defaultdict(list)
terminal_rules = defaultdict(list)
for line in open(file, 'r').readlines():
LHS,RHS = line.strip().split("->")
if RHS[1] == "'" and RHS[-1] == "'" :
terminal_rules[LHS].append(RHS.strip())
else:
nonterminal_rules[LHS].append(RHS.split())
for i in nonterminal_rules:
for j in nonterminal_rules[i]:
if len(j) == 1:
x = terminal_rules[j[0]])
Here are the keys and values to my dict:
print(self.original_grammar.terminal_rules.items())
dict_items([('NN ', ["'body'", "'case'", "'immunity'", "'malaria'", "'mouse'", "'pathogen'", "'research'", "'researcher'", "'response'", "'sepsis'", "'system'", "'type'", "'vaccine'"]), ('NNS ', ["'cells'", "'fragments'", "'humans'", "'infections'", "'mice'", "'Scientists'"]), ('Prep ', ["'In'", "'with'", "'in'", "'of'", "'by'"]), ('IN ', ["'that'"]), ('Adv ', ["'today'", "'online'"]), ('PRP ', ["'this'", "'them'", "'They'"]), ('Det ', ["'a'", "'A'", "'the'", "'The'"]), ('RP ', ["'down'"]), ('AuxZ ', ["'is'", "'was'"]), ('VBN ', ["'alerted'", "'compromised'", "'made'"]), ('Adj ', ["'dendritic'", "'immune'", "'infected'", "'new'", "'Systemic'", "'weak'", "'whole'", "'live'"]), ('VBN ', ["'discovered'"]), ('Aux ', ["'have'"]), ('VBD ', ["'alerted'", "'injected'", "'published'", "'rescued'", "'restored'", "'was'"]), ('COM ', ["','"]), ('PUNC ', ["'?'", "'.'"]), ('PossPro ', ["'their'", "'Their'"]), ('MD ', ["'Will'"]), ('Conj ', ["'and'"]), ('VBP ', ["'alert'", "'capture'", "'display'", "'have'", "'overstimulate'"]), ('VB ', ["'work'"]), ('VBZ ', ["'invades'", "'is'", "'shuts'"]), ('NNP ', ["'Dr'", "'Jose'", "'Villadangos'"])])
Let's say I have the key-value pair {Aux:["have"]}.
The problem is, if i = Aux, for example, x is just set as an empty list, when I actually want to be equal to ["have"].
I'm not sure what I'm doing/accessing incorrectly. Any ideas? Thanks!
I'm assuming from reading your code that you want all things that start and end with ', correct? In that case, you probably want
if RHS[0] == "'" and RHS[-1] == "'" :
terminal_rules[LHS].append(RHS.strip())
Since 0 is the first character of the string :). If ' isn't the second character of the split string, then right now it'll add everything to non_terminal_rules.
If you're trying to set terminal_rules to be every key:value pair in nonterminal_rules that is of length 1, do this:
nonterminal_rules = defaultdict(list)
terminal_rules = defaultdict(list)
for line in open(file, 'r').readlines():
# Do stuff here as you've done above
terminal_rules = {key:value for key,value in nonterminal_rules.items() if len(value) == 1}
Related
I am completely new to python. I have been creating a vocabulary program, but I want the to mix the words, so also the ones behind :. So far it keeps throwing only the ones in front of :. How can I achieve this?
print()
print('Welcome to german vocabulary quiz!')
import random
answer = input('Ready? ')
¨
print('')
while answer=='y' or 'yes':
vocabDictionary={
'e Arbeit':'pracovat', 'oder':'nebo', 'r Abend':'večer', 'als':'jako', 'bitten':'prosit',
'buchstabieren':'hláskovat','wessen':'čí','r Koffer':'kufr','wer':'kdo','wem':'komu',
'wen':'koho','sehen':'vidět','e Tochter':'dcera','gruSen':'zdravit','warten':'čekat','sagen':'říkat',
'e Lehrerin':'učitelka','r Lehrer':'učitel','schreiben':'napsat','zeigen':'ukázat','stehen':'stát','suchen':'hledat',
'fahren':'jet','abfahren':'odjet','kommen':'přijít','hier'and'da':'tady','s Buch':'kniha',
'r Zug':'vlak','offnen':'otevřít','schlieSen':'zavřít','ab/biegen':'odbočit','e Ampel':'semafor',
'denn':'pak'and'potom','dorthin':'tam','až'and'dokud':'bis','zu':'k'and'ke','druben'and'gegenuber':'naproti','fremd':'cizí',
'r FuSganger':'chodec','gerade':'právě','geradeaus':'rovně','e Halstestelle':'zastávka','r Hauptbahnhof':'hlavní nádraží',
'ihnen':'vám','e Kreuzung':'křižovatka','links':'vlevo','nach links':'doleva','mit':'se'or's','nach':'do'or'po',
'rechts':'vpravo','e StraSe':'ulice'and'silnice','uberqueren':'přejít','ungefahr':'přibližně'or'asi',
'von hier':'odsud','weiter':'dál','zu FuS':'pěšky','aber':'ale','alles':'všechno','e Blume':'květina',
'brav':'hodný','ein bisschen':'trochu','faul':'líný','fleiSig':'pilný','e Freizeit':'volný čas','r FuSball':'fotbal',
'gern(e)':'rád','groS':'velký','haben':'mít','horen':'poslouchat','hubsch'and'schon':'hezký'or'pěkný','jetzt':'teď'or'nyní',
'e Journalistin':'novinářka','s Kaninchen':'králík','lernen':'učit se','lieb':'milý','lustig':'veselý',
'manchmal':'někdy'or'občas','nett':'milý'or'vlídný'or'hezký','noch':'ještě','nur':'jen','oft':'často',
'recht':'skutečně'or'opravdu'or'velmi','sauber':'čistý','sauber machen':'uklízet','schauen':'dívat se'or'podívat se',
'schlank':'štíhlý','sehr':'velmi','zehn':'deset','r Spaziergang':'procházka','einen Spaziergang machen':'jít na procházku',
'spielen':'hrát','studieren':'studovat','s Tier':'zvíře','treiben':'zabývat se'or'provozovat','e Zeit':'čas',
'Sport treiben':'sportovat','verheiratet':'ženatý'or'vdaná','r Unternhehmer':'podnikatel','zu Hause':'doma',
'ziemlich':'pořádně'or'značně','zwanzig':'dvacet','aus':'z','dann':'potom','dich':'tebe'or'tě',
'dir':'ti'or'tobě','e Entschuldigung':'omluva'or'prominutí','finden':'nacházet'or'shledávat','gehen':'jít',
'geil':'báječný'or'skvělý'or'super','heiSen':'jmenovat se','r Herr':'pán','e Frau':'paní','r Nachname':'příjmení',
'leider':'bohužel','r Tag':'den','viel':'hodně'and'hodně','was':'co','wie':'jak','woher':'odkud','wohnen':'bydlet',
'Tschechien':'Česko'
}
keyword_list=list(vocabDictionary.keys())
random.shuffle(keyword_list)
score=0
for keyword in keyword_list:
display='{}'
print(display.format(keyword))
userInputAnswer=input(': ')
print('')
vocabDictionary.keys() This code only returns the keys of a dictionary, which are the words before the :
To create a list containing both the keys and the values, you can use .values() to create another list, and add the two lists
keyword_list1=list(vocabDictionary.keys())
keyword_list2= list(vocabDictionary.values())
keyword_list = keyword_list1 + keyword_list2
Full codes below:
print('Welcome to german vocabulary quiz!')
import random
answer = input('Ready? ')
print('')
while answer=='y' or 'yes':
vocabDictionary={
'e Arbeit':'pracovat', 'oder':'nebo', 'r Abend':'večer', 'als':'jako', 'bitten':'prosit',
'buchstabieren':'hláskovat','wessen':'čí','r Koffer':'kufr','wer':'kdo','wem':'komu',
'wen':'koho','sehen':'vidět','e Tochter':'dcera','gruSen':'zdravit','warten':'čekat','sagen':'říkat',
'e Lehrerin':'učitelka','r Lehrer':'učitel','schreiben':'napsat','zeigen':'ukázat','stehen':'stát','suchen':'hledat',
'fahren':'jet','abfahren':'odjet','kommen':'přijít','hier'and'da':'tady','s Buch':'kniha',
'r Zug':'vlak','offnen':'otevřít','schlieSen':'zavřít','ab/biegen':'odbočit','e Ampel':'semafor',
'denn':'pak'and'potom','dorthin':'tam','až'and'dokud':'bis','zu':'k'and'ke','druben'and'gegenuber':'naproti','fremd':'cizí',
'r FuSganger':'chodec','gerade':'právě','geradeaus':'rovně','e Halstestelle':'zastávka','r Hauptbahnhof':'hlavní nádraží',
'ihnen':'vám','e Kreuzung':'křižovatka','links':'vlevo','nach links':'doleva','mit':'se'or's','nach':'do'or'po',
'rechts':'vpravo','e StraSe':'ulice'and'silnice','uberqueren':'přejít','ungefahr':'přibližně'or'asi',
'von hier':'odsud','weiter':'dál','zu FuS':'pěšky','aber':'ale','alles':'všechno','e Blume':'květina',
'brav':'hodný','ein bisschen':'trochu','faul':'líný','fleiSig':'pilný','e Freizeit':'volný čas','r FuSball':'fotbal',
'gern(e)':'rád','groS':'velký','haben':'mít','horen':'poslouchat','hubsch'and'schon':'hezký'or'pěkný','jetzt':'teď'or'nyní',
'e Journalistin':'novinářka','s Kaninchen':'králík','lernen':'učit se','lieb':'milý','lustig':'veselý',
'manchmal':'někdy'or'občas','nett':'milý'or'vlídný'or'hezký','noch':'ještě','nur':'jen','oft':'často',
'recht':'skutečně'or'opravdu'or'velmi','sauber':'čistý','sauber machen':'uklízet','schauen':'dívat se'or'podívat se',
'schlank':'štíhlý','sehr':'velmi','zehn':'deset','r Spaziergang':'procházka','einen Spaziergang machen':'jít na procházku',
'spielen':'hrát','studieren':'studovat','s Tier':'zvíře','treiben':'zabývat se'or'provozovat','e Zeit':'čas',
'Sport treiben':'sportovat','verheiratet':'ženatý'or'vdaná','r Unternhehmer':'podnikatel','zu Hause':'doma',
'ziemlich':'pořádně'or'značně','zwanzig':'dvacet','aus':'z','dann':'potom','dich':'tebe'or'tě',
'dir':'ti'or'tobě','e Entschuldigung':'omluva'or'prominutí','finden':'nacházet'or'shledávat','gehen':'jít',
'geil':'báječný'or'skvělý'or'super','heiSen':'jmenovat se','r Herr':'pán','e Frau':'paní','r Nachname':'příjmení',
'leider':'bohužel','r Tag':'den','viel':'hodně'and'hodně','was':'co','wie':'jak','woher':'odkud','wohnen':'bydlet',
'Tschechien':'Česko'
}
keyword_list1=list(vocabDictionary.keys())
keyword_list2= list(vocabDictionary.values())
keyword_list = keyword_list1 + keyword_list2
random.shuffle(keyword_list)
score=0
for keyword in keyword_list:
display='{}'
print(display.format(keyword))
userInputAnswer=input(': ')
print('')
try:
if userInputAnswer==(vocabDictionary[keyword]):
score += 1
except KeyError:
try:
if keyword == vocabDictionary[userInputAnswer]:
score +=1
except KeyError:
pass
print(score)
Currently, you are only picking words from keys (so before the semicolumn).
You could try this:
keyword_list_keys=list(vocabDictionary.keys())
keyword_list_values=list(vocabDictionary.values())
random.shuffle(keyword_list_keys + keyword_list_values)
Then you would have to differentiate depending on the two cases, to find the matching key/value.
reserved_chars = "? & | ! { } [ ] ( ) ^ ~ * : \ " ' + -"
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\name']
What is that fastest way to loop through every element in a list and add a \ in front of the reserved character if one of the elements contains them?
desired output:
fixed_list = ['gold\-bear#gmail.com', 'P\&G#dom.com', 'JACKSON\! BOT', 'annoying\\name']
You could make a translation table with str.maketrans() and pass that into translate. This takes a little setup, but you can reuse the translation table and it's quite fast:
reserved_chars = '''?&|!{}[]()^~*:\\"'+-'''
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\\name']
# make trans table
replace = ['\\' + l for l in reserved_chars]
trans = str.maketrans(dict(zip(reserved_chars, replace)))
# translate with trans table
fixed_list = [s.translate(trans) for s in list_vals]
print("\n".join(fixed_list))
Prints:
gold\-bear#gmail.com
P\&G#dom.com
JACKSON\! BOT
annoying\\name
There is no fast way - you got strings, strings are immuteable, you need to create new ones.
Probably best way is to build your own translation dictionary and do the grunt work yourself:
reserved = """? & | ! { } [ ] ( ) ^ ~ * : \ " ' + -"""
tr = { c:f"\\{c}" for c in reserved}
print(tr)
data = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\name']
transformed = [ ''.join(tr.get(letter,letter) for letter in word) for word in data]
for word in transformed:
print(word)
Output:
# translation dictionary
{'?': '\\?', ' ': '\\ ', '&': '\\&', '|': '\\|', '!': '\\!', '{': '\\{',
'}': '\\}', '[': '\\[', ']': '\\]', '(': '\\(', ')': '\\)', '^': '\\^',
'~': '\\~', '*': '\\*', ':': '\\:', '\\': '\\\\', '"': '\\"', "'": "\\'",
'+': '\\+', '-': '\\-'}
# transformed strings
gold\-bear#gmail.com
P\&G#dom.com
JACKSON\!\ BOT
annoying
ame
Sidenotes:
Your example missed to escape the space inside 'JACKSON\! BOT'.
The repl() of the transformed list looks "wrongly" escaped because when printing it escapes each '\' itself again - whats being printed see wordlist
Definitely not the fastest, but could be the easiest to code. Make a regex that does it for you, and run re.sub, like this:
import re
reserved_chars = "?&|!{}[]()^~*:\\\"'+-"
replace_regex = "([" + ''.join('\\x%x' % ord(x) for x in reserved_chars) + "])"
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', r'annoying\name']
escaped_vals = [re.sub(replace_regex, r"\\\1", x) for x in list_vals]
Again, just to clarify, regexes are SLOW.
I have a dictionary named data.
Now I want to append more data to the dictionary. However it seems that I do not append, but overwrite the dictionary. How can I append data?
Code:
# Add something to data
data = {'level_a_title': 'Disk 1', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Windows'}
# Add another line for Data
data = {**data, **{'level_a_title': 'Disk 2', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Backup'}}
# Print everything
for key, value in data.items():
print(key + ' = ' + str(value))
Output:
C:\Users\dpa\PycharmProjects\json\venv\Scripts\python.exe C:/Users/dpa/PycharmProjects/json/main.py
level_a_title = Disk 2
level_a_show_on_analysis_report = 1
level_a_type = text
level_a_value = Backup
Process finished with exit code 0
Unique mapping of keys and values, so level_a_titleonly map Disk 1 or Disk 2. So if you want to have two value, just like this
data = [{'level_a_title': 'Disk 1', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Windows'}]
data.append({'level_a_title': 'Disk 2', 'level_a_show_on_analysis_report': '1', 'level_a_type': 'text', 'level_a_value': 'Backup'})
# Print everything
for idx, line in enumerate(data):
print("Index: ", idx)
for key, value in line.items():
print(key + ' = ' + str(value))
I have a script that allows me to extract the info obtained from excel to a list, this list contains str values that contain phrases such as: "I like cooking", "My dog´s name is Doug", etc.
So I've tried this code that I found on the Internet, knowing that the int function has a way to transform an actual phrase into numbers.
The code I used is:
lista=["I like cooking", "My dog´s name is Doug", "Hi, there"]
test_list = [int(i, 36) for i in lista]
Running the code I get the following error:
builtins.ValueError: invalid literal for int() with base 36: "I like
cooking"
But I´ve tried the code without the spaces or punctuation, and i get an actual value, but I do need to take those characters into consideration.
To expand on the bytearray approach you could use int.to_bytes and int.from_bytes to actually get an int back, although the integers will be much longer than you show in your example.
def to_int(s):
return int.from_bytes(bytearray(s, 'utf-8'), 'big', signed=False)
def to_str(s):
return s.to_bytes((s.bit_length() +7 ) // 8, 'big').decode()
lista = ["I like cooking",
"My dog´s name is Doug",
"Hi, there"]
encoded = [to_int(s) for s in lista]
decoded = [to_str(s) for s in encoded]
encoded:
[1483184754092458833204681315544679,
28986146900667755422058678317652141643897566145770855,
1335744041264385192549]
decoded:
['I like cooking',
'My dog´s name is Doug',
'Hi, there']
As noted in the comments, converting phrases to integers with int() won't work if the phrase contains whitespace or most non-alphanumeric characters with a few exceptions.
If your phrases all use a common encoding, then you might get something closer to what you want by converting your strings to bytearrays. For example:
s = 'My dog´s name is Doug'
b = bytearray(s, 'utf-8')
print(list(b))
# [77, 121, 32, 100, 111, 103, 194, 180, 115, 32, 110, 97, 109, 101, 32, 105, 115, 32, 68, 111, 117, 103]
From there you would have to figure out whether or not you want to preserve the list of integers representing each phrase or combine them in some way depending on what you intend to do with these numerical string representations.
Since you want to convert your text for an AI, you should do something like this:
import re
def clean_text(text, vocab):
'''
normalizes the string
'''
chars = {'\'':[u"\u0060", u"\u00B4", u"\u2018", u"\u2019"], 'a':[u"\u00C0", u"\u00C1", u"\u00C2", u"\u00C3", u"\u00C4", u"\u00C5", u"\u00E0", u"\u00E1", u"\u00E2", u"\u00E3", u"\u00E4", u"\u00E5"],
'e':[u"\u00C8", u"\u00C9", u"\u00CA", u"\u00CB", u"\u00E8", u"\u00E9", u"\u00EA", u"\u00EB"],
'i':[u"\u00CC", u"\u00CD", u"\u00CE", u"\u00CF", u"\u00EC", u"\u00ED", u"\u00EE", u"\u00EF"],
'o':[u"\u00D2", u"\u00D3", u"\u00D4", u"\u00D5", u"\u00D6", u"\u00F2", u"\u00F3", u"\u00F4", u"\u00F5", u"\u00F6"],
'u':[u"\u00DA", u"\u00DB", u"\u00DC", u"\u00DD", u"\u00FA", u"\u00FB", u"\u00FC", u"\u00FD"]}
for gud in chars:
for bad in chars[gud]:
text = text.replace(bad, gud)
if 'http' in text:
return ''
text = text.replace('&', ' and ')
text = re.sub(r'\.( +\.)+', '..', text)
#text = re.sub(r'\.\.+', ' ^ ', text)
text = re.sub(r',+', ',', text)
text = re.sub(r'\-+', '-', text)
text = re.sub(r'\?+', ' ? ', text)
text = re.sub(r'\!+', ' ! ', text)
text = re.sub(r'\'+', "'", text)
text = re.sub(r';+', ':', text)
text = re.sub(r'/+', ' / ', text)
text = re.sub(r'<+', ' < ', text)
text = re.sub(r'>+', ' > ', text)
text = text.replace('%', '% ')
text = text.replace(' - ', ' : ')
text = text.replace(' -', " - ")
text = text.replace('- ', " - ")
text = text.replace(" '", " ")
text = text.replace("' ", " ")
#for c in ".,:":
# text = text.replace(c + ' ', ' ' + c + ' ')
text = re.sub(r' +', ' ', text.strip(' '))
for i in text:
if i not in vocab:
text = text.replace(i, '')
return text
def arr_to_vocab(arr, vocabDict):
'''
returns a provided array converted with provided vocab dict, all array elements have to be in the vocab, but not all vocab elements have to be in the input array, works with strings too
'''
try:
return [vocabDict[i] for i in arr]
except Exception as e:
print (e)
return []
def str_to_vocab(vocab):
'''
generates vocab dicts
'''
to_vocab = {}
from_vocab = {}
for index, i in enumerate(vocab):
to_vocab[index] = i
from_vocab[i] = index
return to_vocab, from_vocab
vocab = sorted([chr(i) for i in range(32, 127)]) # a basic vocab for your model
vocab.insert(0, None)
toVocab, fromVocab = str_to_vocab(vocab) #converting vocab into usable form
your_data_str = ["I like cooking", "My dog´s name is Doug", "Hi, there"] #your data, a list of strings
X = []
for i in your_data_str:
X.append(arr_to_vocab(clean_text(i, vocab), fromVocab)) # normalizing and converting to "ints" each string
# your data is now almost ready for your model, just pad it to the size of your input with zeros and it's done
print (X)
If you want to know how convert an "int" string back to a string, tell me.
This is somewhat complicated. I have a list that looks like this:
['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
In my list, the '\n' is what separate a story. What I would like to do is to create a dictionary from the above list that would like this:
dict = {ID1: [19841018, 'Plunging oil... cut in the price'], ID2: [19841018, 'The U.S. dollar... the foreign-exchange markets']}
You can see that my KEY of my dictionnary is the ID and the items are the year and the combination of the stories. Is that doable?
My IDs, are in this format J00100394, J00384932. So they all start with J00.
The tricky part is split your list by any value, so i've take this part from here.Then i've parsed the list parts to built the res dict
>>> import itertools
>>> def isplit(iterable,splitters):
... return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]
...
>>> l = ['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
>>> res = {}
>>> for sublist in isplit(l,('\n',)):
... id_parts = sublist[0].split()
... story = ' '.join (sentence.strip() for sentence in sublist[1:])
... res[id_parts[1].strip()] = [id_parts[0].strip(), story]
...
>>> res
{'ID2': ['19841018', 'The U.S. dollar... the foreign-exchange markets late New York trading'], 'ID1': ['19841018', 'Plunging oil... cut in the price']}
I code an answer that use generator. The idea is that every time that start an id token the generator return the last key computed. You can costumize by change the check_fun() and how to mix the part of the description.
def trailing_carriage(s):
if s.endswith('\n'):
return s[:-1]
return s
def check_fun(s):
"""
:param s:Take a string s
:return: None if s dosn't match the ID rules. Otherwise return the
name,value of the token
"""
if ' ' in s:
id_candidate,name = s.split(" ",1)
try:
return trailing_carriage(name),int(id_candidate)
except ValueError:
pass
def parser_list(list, check_id_prefix=check_fun):
name = None #key dict
id_candidate = None
desc = "" #description string
for token in list:
check = check_id_prefix(token)
if check is not None:
if name is not None:
"""Return the previous coputed entry"""
yield name,id_val,desc
name,id_val = check
else:
"""Append the description"""
desc += trailing_carriage(token)
if name is not None:
"""Flush the last entry"""
yield name,id_val,desc
>>> list = ['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
>>> print {k:[i,d] for k,i,d in parser_list(list)}
{'ID2': [19841018, ' Plunging oil... cut in the price The U.S. dollar... the foreign-exchange markets late New York trading '], 'ID1': [19841018, ' Plunging oil... cut in the price ']}