I am completely new to python. I have been creating a vocabulary program, but I want the to mix the words, so also the ones behind :. So far it keeps throwing only the ones in front of :. How can I achieve this?
print()
print('Welcome to german vocabulary quiz!')
import random
answer = input('Ready? ')
¨
print('')
while answer=='y' or 'yes':
vocabDictionary={
'e Arbeit':'pracovat', 'oder':'nebo', 'r Abend':'večer', 'als':'jako', 'bitten':'prosit',
'buchstabieren':'hláskovat','wessen':'čí','r Koffer':'kufr','wer':'kdo','wem':'komu',
'wen':'koho','sehen':'vidět','e Tochter':'dcera','gruSen':'zdravit','warten':'čekat','sagen':'říkat',
'e Lehrerin':'učitelka','r Lehrer':'učitel','schreiben':'napsat','zeigen':'ukázat','stehen':'stát','suchen':'hledat',
'fahren':'jet','abfahren':'odjet','kommen':'přijít','hier'and'da':'tady','s Buch':'kniha',
'r Zug':'vlak','offnen':'otevřít','schlieSen':'zavřít','ab/biegen':'odbočit','e Ampel':'semafor',
'denn':'pak'and'potom','dorthin':'tam','až'and'dokud':'bis','zu':'k'and'ke','druben'and'gegenuber':'naproti','fremd':'cizí',
'r FuSganger':'chodec','gerade':'právě','geradeaus':'rovně','e Halstestelle':'zastávka','r Hauptbahnhof':'hlavní nádraží',
'ihnen':'vám','e Kreuzung':'křižovatka','links':'vlevo','nach links':'doleva','mit':'se'or's','nach':'do'or'po',
'rechts':'vpravo','e StraSe':'ulice'and'silnice','uberqueren':'přejít','ungefahr':'přibližně'or'asi',
'von hier':'odsud','weiter':'dál','zu FuS':'pěšky','aber':'ale','alles':'všechno','e Blume':'květina',
'brav':'hodný','ein bisschen':'trochu','faul':'líný','fleiSig':'pilný','e Freizeit':'volný čas','r FuSball':'fotbal',
'gern(e)':'rád','groS':'velký','haben':'mít','horen':'poslouchat','hubsch'and'schon':'hezký'or'pěkný','jetzt':'teď'or'nyní',
'e Journalistin':'novinářka','s Kaninchen':'králík','lernen':'učit se','lieb':'milý','lustig':'veselý',
'manchmal':'někdy'or'občas','nett':'milý'or'vlídný'or'hezký','noch':'ještě','nur':'jen','oft':'často',
'recht':'skutečně'or'opravdu'or'velmi','sauber':'čistý','sauber machen':'uklízet','schauen':'dívat se'or'podívat se',
'schlank':'štíhlý','sehr':'velmi','zehn':'deset','r Spaziergang':'procházka','einen Spaziergang machen':'jít na procházku',
'spielen':'hrát','studieren':'studovat','s Tier':'zvíře','treiben':'zabývat se'or'provozovat','e Zeit':'čas',
'Sport treiben':'sportovat','verheiratet':'ženatý'or'vdaná','r Unternhehmer':'podnikatel','zu Hause':'doma',
'ziemlich':'pořádně'or'značně','zwanzig':'dvacet','aus':'z','dann':'potom','dich':'tebe'or'tě',
'dir':'ti'or'tobě','e Entschuldigung':'omluva'or'prominutí','finden':'nacházet'or'shledávat','gehen':'jít',
'geil':'báječný'or'skvělý'or'super','heiSen':'jmenovat se','r Herr':'pán','e Frau':'paní','r Nachname':'příjmení',
'leider':'bohužel','r Tag':'den','viel':'hodně'and'hodně','was':'co','wie':'jak','woher':'odkud','wohnen':'bydlet',
'Tschechien':'Česko'
}
keyword_list=list(vocabDictionary.keys())
random.shuffle(keyword_list)
score=0
for keyword in keyword_list:
display='{}'
print(display.format(keyword))
userInputAnswer=input(': ')
print('')
vocabDictionary.keys() This code only returns the keys of a dictionary, which are the words before the :
To create a list containing both the keys and the values, you can use .values() to create another list, and add the two lists
keyword_list1=list(vocabDictionary.keys())
keyword_list2= list(vocabDictionary.values())
keyword_list = keyword_list1 + keyword_list2
Full codes below:
print('Welcome to german vocabulary quiz!')
import random
answer = input('Ready? ')
print('')
while answer=='y' or 'yes':
vocabDictionary={
'e Arbeit':'pracovat', 'oder':'nebo', 'r Abend':'večer', 'als':'jako', 'bitten':'prosit',
'buchstabieren':'hláskovat','wessen':'čí','r Koffer':'kufr','wer':'kdo','wem':'komu',
'wen':'koho','sehen':'vidět','e Tochter':'dcera','gruSen':'zdravit','warten':'čekat','sagen':'říkat',
'e Lehrerin':'učitelka','r Lehrer':'učitel','schreiben':'napsat','zeigen':'ukázat','stehen':'stát','suchen':'hledat',
'fahren':'jet','abfahren':'odjet','kommen':'přijít','hier'and'da':'tady','s Buch':'kniha',
'r Zug':'vlak','offnen':'otevřít','schlieSen':'zavřít','ab/biegen':'odbočit','e Ampel':'semafor',
'denn':'pak'and'potom','dorthin':'tam','až'and'dokud':'bis','zu':'k'and'ke','druben'and'gegenuber':'naproti','fremd':'cizí',
'r FuSganger':'chodec','gerade':'právě','geradeaus':'rovně','e Halstestelle':'zastávka','r Hauptbahnhof':'hlavní nádraží',
'ihnen':'vám','e Kreuzung':'křižovatka','links':'vlevo','nach links':'doleva','mit':'se'or's','nach':'do'or'po',
'rechts':'vpravo','e StraSe':'ulice'and'silnice','uberqueren':'přejít','ungefahr':'přibližně'or'asi',
'von hier':'odsud','weiter':'dál','zu FuS':'pěšky','aber':'ale','alles':'všechno','e Blume':'květina',
'brav':'hodný','ein bisschen':'trochu','faul':'líný','fleiSig':'pilný','e Freizeit':'volný čas','r FuSball':'fotbal',
'gern(e)':'rád','groS':'velký','haben':'mít','horen':'poslouchat','hubsch'and'schon':'hezký'or'pěkný','jetzt':'teď'or'nyní',
'e Journalistin':'novinářka','s Kaninchen':'králík','lernen':'učit se','lieb':'milý','lustig':'veselý',
'manchmal':'někdy'or'občas','nett':'milý'or'vlídný'or'hezký','noch':'ještě','nur':'jen','oft':'často',
'recht':'skutečně'or'opravdu'or'velmi','sauber':'čistý','sauber machen':'uklízet','schauen':'dívat se'or'podívat se',
'schlank':'štíhlý','sehr':'velmi','zehn':'deset','r Spaziergang':'procházka','einen Spaziergang machen':'jít na procházku',
'spielen':'hrát','studieren':'studovat','s Tier':'zvíře','treiben':'zabývat se'or'provozovat','e Zeit':'čas',
'Sport treiben':'sportovat','verheiratet':'ženatý'or'vdaná','r Unternhehmer':'podnikatel','zu Hause':'doma',
'ziemlich':'pořádně'or'značně','zwanzig':'dvacet','aus':'z','dann':'potom','dich':'tebe'or'tě',
'dir':'ti'or'tobě','e Entschuldigung':'omluva'or'prominutí','finden':'nacházet'or'shledávat','gehen':'jít',
'geil':'báječný'or'skvělý'or'super','heiSen':'jmenovat se','r Herr':'pán','e Frau':'paní','r Nachname':'příjmení',
'leider':'bohužel','r Tag':'den','viel':'hodně'and'hodně','was':'co','wie':'jak','woher':'odkud','wohnen':'bydlet',
'Tschechien':'Česko'
}
keyword_list1=list(vocabDictionary.keys())
keyword_list2= list(vocabDictionary.values())
keyword_list = keyword_list1 + keyword_list2
random.shuffle(keyword_list)
score=0
for keyword in keyword_list:
display='{}'
print(display.format(keyword))
userInputAnswer=input(': ')
print('')
try:
if userInputAnswer==(vocabDictionary[keyword]):
score += 1
except KeyError:
try:
if keyword == vocabDictionary[userInputAnswer]:
score +=1
except KeyError:
pass
print(score)
Currently, you are only picking words from keys (so before the semicolumn).
You could try this:
keyword_list_keys=list(vocabDictionary.keys())
keyword_list_values=list(vocabDictionary.values())
random.shuffle(keyword_list_keys + keyword_list_values)
Then you would have to differentiate depending on the two cases, to find the matching key/value.
Related
I have a function that is able to create triples and relationships from text. However, when I create a list of a column that contains text and pass it through the function, it only processes the first row, or item of the list. Therefore, I am wondering how the whole list can be processed within this function. Maybe a for loop would work?
The following line contains the list
rez_dictionary = {'Decent Little Reader, Poor Tablet',
'Ok For What It Is',
'Too Heavy and Poor weld quality,',
'difficult mount',
'just got it installed'}
from transformers import pipeline
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
# We need to use the tokenizer manually since we need special tokens.
extracted_text = triplet_extractor.tokenizer.batch_decode([triplet_extractor(rez_dictionary, return_tensors=True, return_text=False)[0]["generated_token_ids"]])
print(extracted_text[0])
If anyone has a suggestion, I am looking forward for it.
Would it also be possible to get the output adjusted to the following format:
# Function to parse the generated text and extract the triplets
def extract_triplets(text):
triplets = []
relation, subject, relation, object_ = '', '', '', ''
text = text.strip()
current = 'x'
for token in text.replace("<s>", "").replace("<pad>", "").replace("</s>", "").split():
if token == "<triplet>":
current = 't'
if relation != '':
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
relation = ''
subject = ''
elif token == "<subj>":
current = 's'
if relation != '':
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
object_ = ''
elif token == "<obj>":
current = 'o'
relation = ''
else:
if current == 't':
subject += ' ' + token
elif current == 's':
object_ += ' ' + token
elif current == 'o':
relation += ' ' + token
if subject != '' and relation != '' and object_ != '':
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
return triplets
extracted_triplets = extract_triplets(extracted_text[0])
print(extracted_triplets)
You are removing the other entries of rez_dictionary inside the batch_decode:
triplet_extractor(rez_dictionary, return_tensors=True, return_text=False)[0]["generated_token_ids"]
Use a list comprehension instead:
from transformers import pipeline
rez = ['Decent Little Reader, Poor Tablet',
'Ok For What It Is',
'Too Heavy and Poor weld quality,',
'difficult mount',
'just got it installed']
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
model_output = triplet_extractor(rez, return_tensors=True, return_text=False)
extracted_text = triplet_extractor.tokenizer.batch_decode([x["generated_token_ids"] for x in model_output])
print("\n".join(extracted_text))
Output:
<s><triplet> Decent Little Reader <subj> Poor Tablet <obj> different from <triplet> Poor Tablet <subj> Decent Little Reader <obj> different from</s>
<s><triplet> Ok For What It Is <subj> film <obj> instance of</s>
<s><triplet> Too Heavy and Poor <subj> weld quality <obj> subclass of</s>
<s><triplet> difficult mount <subj> mount <obj> subclass of</s>
<s><triplet> 2008 Summer Olympics <subj> 2008 <obj> point in time</s>
Regarding the extension of the OP's question, OP wanted to know how to run the function extract_triplets. OP can simply do that via a for-loop:
for text in extracted_text:
print(extract_triplets(text))
Output:
[{'head': 'Decent Little Reader', 'type': 'different from', 'tail': 'Poor Tablet'}, {'head': 'Poor Tablet', 'type': 'different from', 'tail': 'Decent Little Reader'}]
[{'head': 'Ok For What It Is', 'type': 'instance of', 'tail': 'film'}]
[{'head': 'Too Heavy and Poor', 'type': 'subclass of', 'tail': 'weld quality'}]
[{'head': 'difficult mount', 'type': 'subclass of', 'tail': 'mount'}]
[{'head': '2008 Summer Olympics', 'type': 'point in time', 'tail': '2008'}]
I created a very simple story generator with Python based on this comic strip: https://xkcd.com/2243/
Each time that I run the script, it generates a new random story, however, if the user chooses to run it again by writing "y", the story generated is always the same. What am I doing wrong?
This is the code:
# a random spoiler generator based on this comic:
# https://xkcd.com/2243/
import random
# define the various options and choose randomly one
villain = random.choice(['Kyle Ren', 'Malloc', 'Darth Sebelius', 'Theranos', 'Lord Juul'])
friend = random.choice(['Kym Spacemeasurer','Teen Yoda','Dab Tweetdek', 'Yaz Progestin', 'TI-83'])
lightsaber = random.choice(['beige', 'ochre', 'mauve', 'aquamarine', 'taupe'])
superweapon = random.choice(['Sun Obliterator', 'Moonsquisher', 'World Eater', 'Planet Zester', 'Superconducting Supercollider'])
superpower = random.choice(['blowing up a planet with a bunch of beams of energy that combine into one', 'blowing up a bunch of planets with one beam of energy that splits into many', 'cutting a planet in half and smashing the halves together like two cymbals', "increasing the CO2 levels in a planet's atmosphere, causing rapid heating", 'triggering the end credits before the movies is done'])
old_enemy = random.choice(['Boba Fett', 'Salacious Crumb', 'The Space Slug', 'The Bottom Half of Darth Maul', 'YouTube Commenters'])
feat = random.choice(['a bow that shoots little lightsaber-headed arrows.', 'X-Wings and TIE Fighters dodging the giant letters of the opening crawl.', 'a Sith educational display that uses force lightning to demonstrate the dielectric breakdown of air.', 'Kylo Ren putting on another helmet over his smaller one.', 'a Sith car wash where the bristles on the brushes are little lightsabers.'])
father = random.choice(['Luke', 'Leia', 'Han', 'Obi-Wan', 'a random junk-trader'])
mother = random.choice(['Poe.', 'BB-8.', 'Amilyn Holdo.', 'Laura Dern.', 'a random junk-trader.', 'that one droid from the Jawa Sandcrawler that says "gonk".'])
# creates the parts of the story
intro = 'In this Star Wars movie, our heroes return to take on the First Order and new villain '
part_1 = '. With help from their new friend '
part_2 = ', Rey builds a new lightsaber with a '
part_3 = " blade, and they head out to confront the First Order's new weapon, the "
part_4 = ', a space station capable of '
part_5 = '. They unexpectedly join forces with their old enemy, '
part_6 = ', and destroy the superweapon in a battle featuring '
part_7 = "\n\nP.S. Rey's parents are "
part_8 = ' and '
# generates the story
def rsg():
print(intro + villain + part_1 + friend + part_2 + lightsaber + part_3 + superweapon + part_4 + superpower + part_5 + old_enemy + part_6 + feat + part_7 + father + part_8 + mother)
# asks user to generate another story or not
while True:
rsg()
while True:
user_input = input('Would you like to generate a new spoiler? [y,n]\n')
if user_input not in ('y', 'n'):
print('Please enter "y" for yes or "n" for no.')
continue
if user_input == 'y':
break
else:
print('Alright, bye!')
quit()
You're using random.choice only first time when initiating. Should be:
villains = ['Kyle Ren', 'Malloc', 'Darth Sebelius', 'Theranos', 'Lord Juul']
def rsg():
print(intro + random.choice(villains) ....
The variables are never updated, just computed at the start of the program.
Put all the random.choice lines in the rsg function and you will be good !
Hi I'm in the process of learning so you may have to bear with me. I have 2 lists I'd like to compare whilst keeping any matches and append them whilst appending any non matches to another output list.
Heres my code:
def EntryToFieldMatch(Entry, Fields):
valid = []
invalid = []
for c in Entry:
count = 0
for s in Fields:
count +=1
if s in c:
valid.append(c)
elif count == len(Entry):
invalid.append(s)
Fields.remove(s)
print valid
print "-"*50
print invalid
def main():
vEntry = ['27/04/2014', 'Hours = 28', 'Site = Abroad', '03/05/2015', 'Date = 28-04-2015', 'Travel = 2']
Fields = ['Week_Stop', 'Date', 'Site', 'Hours', 'Travel', 'Week_Start', 'Letters']
EntryToFieldMatch(vEntry, Fields)
if __name__ = "__main__":
main()
the output seems fine except its not returning all the fields in the 2 output lists. This is the output I receive:
['Hours = 28', 'Site = Abroad', 'Date = 28-04-2015', 'Travel = 2']
--------------------------------------------------
['Week_Start', 'Letters']
I just have no idea why the second list doesn't include "Week_Stop". I've run the debugger and followed the code through a few times to no avail. I've read about sets but I didn't see any way to return fields that match and discard fields that don't.
Also im open to suggestion's if anybody knows of a way to simplify this whole process, I'm not asking for free code, just a nod in the right direction.
Python 2.7, Thanks
You only have two conditions, either it is in the string or the count is equal to the length of Entry, neither of which catch the first element 'Week_Stop', the length goes from 7-6-5 catching Week_Start but never gets to 0 so you never reach Week_Stop.
A more efficient way would be to use sets or a collections.OrderedDict if you want to keep order:
from collections import OrderedDict
def EntryToFieldMatch(Entry, Fields):
valid = []
# create orderedDict from the words in Fields
# dict lookups are 0(1)
st = OrderedDict.fromkeys(Fields)
# iterate over Entry
for word in Entry:
# split the words once on whitespace
spl = word.split(None, 1)
# if the first word/word appears in our dict keys
if spl[0] in st:
# add to valid list
valid.append(word)
# remove the key
del st[spl[0]]
print valid
print "-"*50
# only invalid words will be left
print st.keys()
Output:
['Hours = 28', 'Site = Abroad', 'Date = 28-04-2015', 'Travel = 2']
--------------------------------------------------
['Week_Stop', 'Week_Start', 'Letters']
For large lists this would be significantly faster than your quadratic approach. Having 0(1) dict lookups means your code goes from quadratic to linear, every time you do in Fields that is an 0(n) operation.
Using a set the approach is similar:
def EntryToFieldMatch(Entry, Fields):
valid = []
st = set(Fields)
for word in Entry:
spl = word.split(None,1)
if spl[0] in st:
valid.append(word)
st.remove(spl[0])
print valid
print "-"*50
print st
The difference using sets is order is not maintained.
Using list comprehension:
def EntryToFieldMatch(Entries, Fields):
# using list comprehension
# (typically they go on one line, but they can be multiline
# so they look more like their for loop equivalents)
valid = [entry for entry in Entries
if any([field in entry
for field in Fields])]
invalidEntries = [entry for entry in Entries
if not any([field in entry
for field in Fields])]
missedFields = [field for field in Fields
if not any([field in entry
for entry in Entries])]
print 'valid entries:', valid
print '-' * 80
print 'invalid entries:', invalidEntries
print '-' * 80
print 'missed fields:', missedFields
vEntry = ['27/04/2014', 'Hours = 28', 'Site = Abroad', '03/05/2015', 'Date = 28-04-2015', 'Travel = 2']
Fields = ['Week_Stop', 'Date', 'Site', 'Hours', 'Travel', 'Week_Start', 'Letters']
EntryToFieldMatch(vEntry, Fields)
valid entries: ['Hours = 28', 'Site = Abroad', 'Date = 28-04-2015', 'Travel = 2']
--------------------------------------------------------------------------------
invalid entries: ['27/04/2014', '03/05/2015']
--------------------------------------------------------------------------------
missed fields: ['Week_Stop', 'Week_Start', 'Letters']
This is somewhat complicated. I have a list that looks like this:
['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
In my list, the '\n' is what separate a story. What I would like to do is to create a dictionary from the above list that would like this:
dict = {ID1: [19841018, 'Plunging oil... cut in the price'], ID2: [19841018, 'The U.S. dollar... the foreign-exchange markets']}
You can see that my KEY of my dictionnary is the ID and the items are the year and the combination of the stories. Is that doable?
My IDs, are in this format J00100394, J00384932. So they all start with J00.
The tricky part is split your list by any value, so i've take this part from here.Then i've parsed the list parts to built the res dict
>>> import itertools
>>> def isplit(iterable,splitters):
... return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]
...
>>> l = ['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
>>> res = {}
>>> for sublist in isplit(l,('\n',)):
... id_parts = sublist[0].split()
... story = ' '.join (sentence.strip() for sentence in sublist[1:])
... res[id_parts[1].strip()] = [id_parts[0].strip(), story]
...
>>> res
{'ID2': ['19841018', 'The U.S. dollar... the foreign-exchange markets late New York trading'], 'ID1': ['19841018', 'Plunging oil... cut in the price']}
I code an answer that use generator. The idea is that every time that start an id token the generator return the last key computed. You can costumize by change the check_fun() and how to mix the part of the description.
def trailing_carriage(s):
if s.endswith('\n'):
return s[:-1]
return s
def check_fun(s):
"""
:param s:Take a string s
:return: None if s dosn't match the ID rules. Otherwise return the
name,value of the token
"""
if ' ' in s:
id_candidate,name = s.split(" ",1)
try:
return trailing_carriage(name),int(id_candidate)
except ValueError:
pass
def parser_list(list, check_id_prefix=check_fun):
name = None #key dict
id_candidate = None
desc = "" #description string
for token in list:
check = check_id_prefix(token)
if check is not None:
if name is not None:
"""Return the previous coputed entry"""
yield name,id_val,desc
name,id_val = check
else:
"""Append the description"""
desc += trailing_carriage(token)
if name is not None:
"""Flush the last entry"""
yield name,id_val,desc
>>> list = ['19841018 ID1\n', ' Plunging oil... \n', 'cut in the price \n', '\n', '19841018 ID2\n', ' The U.S. dollar... \n', 'the foreign-exchange markets \n', 'late New York trading \n', '\n']
>>> print {k:[i,d] for k,i,d in parser_list(list)}
{'ID2': [19841018, ' Plunging oil... cut in the price The U.S. dollar... the foreign-exchange markets late New York trading '], 'ID1': [19841018, ' Plunging oil... cut in the price ']}
I'm having trouble accessing some values in a dictionary I made. In my code, I made two different dictionaries while reading through a file. The code I have is this:
nonterminal_rules = defaultdict(list)
terminal_rules = defaultdict(list)
for line in open(file, 'r').readlines():
LHS,RHS = line.strip().split("->")
if RHS[1] == "'" and RHS[-1] == "'" :
terminal_rules[LHS].append(RHS.strip())
else:
nonterminal_rules[LHS].append(RHS.split())
for i in nonterminal_rules:
for j in nonterminal_rules[i]:
if len(j) == 1:
x = terminal_rules[j[0]])
Here are the keys and values to my dict:
print(self.original_grammar.terminal_rules.items())
dict_items([('NN ', ["'body'", "'case'", "'immunity'", "'malaria'", "'mouse'", "'pathogen'", "'research'", "'researcher'", "'response'", "'sepsis'", "'system'", "'type'", "'vaccine'"]), ('NNS ', ["'cells'", "'fragments'", "'humans'", "'infections'", "'mice'", "'Scientists'"]), ('Prep ', ["'In'", "'with'", "'in'", "'of'", "'by'"]), ('IN ', ["'that'"]), ('Adv ', ["'today'", "'online'"]), ('PRP ', ["'this'", "'them'", "'They'"]), ('Det ', ["'a'", "'A'", "'the'", "'The'"]), ('RP ', ["'down'"]), ('AuxZ ', ["'is'", "'was'"]), ('VBN ', ["'alerted'", "'compromised'", "'made'"]), ('Adj ', ["'dendritic'", "'immune'", "'infected'", "'new'", "'Systemic'", "'weak'", "'whole'", "'live'"]), ('VBN ', ["'discovered'"]), ('Aux ', ["'have'"]), ('VBD ', ["'alerted'", "'injected'", "'published'", "'rescued'", "'restored'", "'was'"]), ('COM ', ["','"]), ('PUNC ', ["'?'", "'.'"]), ('PossPro ', ["'their'", "'Their'"]), ('MD ', ["'Will'"]), ('Conj ', ["'and'"]), ('VBP ', ["'alert'", "'capture'", "'display'", "'have'", "'overstimulate'"]), ('VB ', ["'work'"]), ('VBZ ', ["'invades'", "'is'", "'shuts'"]), ('NNP ', ["'Dr'", "'Jose'", "'Villadangos'"])])
Let's say I have the key-value pair {Aux:["have"]}.
The problem is, if i = Aux, for example, x is just set as an empty list, when I actually want to be equal to ["have"].
I'm not sure what I'm doing/accessing incorrectly. Any ideas? Thanks!
I'm assuming from reading your code that you want all things that start and end with ', correct? In that case, you probably want
if RHS[0] == "'" and RHS[-1] == "'" :
terminal_rules[LHS].append(RHS.strip())
Since 0 is the first character of the string :). If ' isn't the second character of the split string, then right now it'll add everything to non_terminal_rules.
If you're trying to set terminal_rules to be every key:value pair in nonterminal_rules that is of length 1, do this:
nonterminal_rules = defaultdict(list)
terminal_rules = defaultdict(list)
for line in open(file, 'r').readlines():
# Do stuff here as you've done above
terminal_rules = {key:value for key,value in nonterminal_rules.items() if len(value) == 1}