How do I remove newline from my key in a dictionary? - python

I have a file I am reading from and putting it into a dictionary. How do I remove the new line from the key?
def main():
# Set up empty dictionary
counter = {}
# open text file
my_file = open("WorldSeriesWinners.txt", "r")
words = my_file.readlines()
# Add each unique word to dictionary with a counter of 0
unique_words = list(set(words))
for word in unique_words:
counter[word] = 0
# For each word in the text increase its counter in the dictionary
for item in words:
counter[item] += 1
return counter
counter = main()
print(counter)
OUTPUT:
{'Cleveland Indians\n': 2, 'Pittsburgh Pirates\n': 5, 'St. Louis Cardinals\n': 10, 'New York Giants\n': 5, 'Cincinnati Reds\n': 5, 'Boston Americans\n': 1, 'Chicago White Sox\n': 3, 'Toronto Blue Jays\n': 2, 'Detroit Tigers\n': 4, 'NONE\n': 2, 'Boston Red Sox\n': 6, 'Minnesota Twins\n': 2, 'Kansas City Royals\n': 1, 'Chicago Cubs\n': 2, 'Baltimore Orioles\n': 3, 'Arizona Diamondbacks\n': 1, 'Philadelphia Phillies': 1, 'Los Angeles Dodgers\n': 5, 'Brooklyn Dodgers\n': 1, 'Florida Marlins\n': 2, 'Washington Senators\n': 1, 'New York Yankees\n': 26, 'Philadelphia Athletics\n': 5, 'Boston Braves\n': 1, 'New York Mets\n': 2, 'Atlanta Braves\n': 1, 'Anaheim Angels\n': 1, 'Philadelphia Phillies\n': 1, 'Oakland Athletics\n': 4, 'Milwaukee Braves\n': 1}

Just use the replace function when defining your key. See below:
def main():
# Set up empty dictionary
counter = {}
# open text file
my_file = open("WorldSeriesWinners.txt", "r")
words = my_file.readlines()
# Add each unique word to dictionary with a counter of 0
unique_words = list(set(words))
for word in unique_words:
word_no_lines = word.replace('\n', '')
counter[word_no_lines] = 0
# For each word in the text increase its counter in the dictionary
for item in words:
item_no_lines = item.replace('\n', '')
counter[item_no_lines] += 1
return counter
counter = main()
print(counter)

Related

Retrieve specific fields of a namedtuple instance from a nested list in Python

I am starting a project for school, a basic text-adventure game, where the player navigates through several rooms, each with a set of items, and adds an item to their inventory by spending a turn.
Since each item is unique to a room and each item is also unique in its stat benefits, I have done the following to implement these:
Item = namedtuple('Item', ['name', 'atk', 'defense', 'agi', 'hp'])
#Sport Store Items
bat = Item('Baseball Bat', 3, 1, 0, 0)
shoes = Item('Running Shoes', 0, 1, 3, 0)
dryeggs = Item('Freeze Dried Eggs', 0, 0, 1, 2)
#Clothes Store Items
belt = Item('Studded Belt', 1, 1, 0, 0)
candy = Item('Japanese Candy', 0, 0, 1, 1)
jacket = Item('Leather Jacket', 0, 3, 0, 1)
#Toy Store Items:
cars = Item('Toy Car Pack', 1, 1, 0, 0)
crayons = Item('Crayons', 0, 0, 1, 0)
toygun = Item('Toy Gun', 2, 1, 0, 0)
#Candle Store Items:
jar = Item('Candle Jar', 2, 0, 0, 0)
matches = Item('Matches', 1, 0, 1, 0)
wax = Item('Wax', 0, 2, 1, 0)
#Music Store Items:
disc = Item('Vinyl Disc', 2, 0, 1, 0)
guitar = Item('Electric Guitar', 3, 0, 0, 0)
symbol = Item('Symbol', 1, 2, 0, 0)
all_items = [
[bat, shoes, dryeggs, '1'],
[belt, candy, jacket, '2'],
[cars, crayons, toygun, '3'],
[jar, matches, wax, '4'],
[disc, guitar, symbol, '5']
]
My issue is here, in my get_items function:
def get_items(id):
for i in all_items:
if i[3] == id:
items = i
items = list(items[0:4])
return items
else:
continue
I'm trying to get the list of items based on the matching ID parameter. There is a function in another file that takes the player's current position and sends it to other functions as the map_id. I've successfully made it so the name of the store changes based on this position, but I cannot figure out how to handle these sub-lists to return a specific sub-list based on the id.
For example, in the sport_store code, I'm trying to return the results like so:
def sport_store():
room_id = '1'
item_select = items.get_items(room_id)
#FIXME: import items.py later with this function in it.
if item_select != []:
if entering == True:
print('You enter the Sporting Goods Store.')
print('There aren\'t many supplies left. What will you pick?')
else:
print('There aren\'t many supplies left. What will you pick?')
print(item_select)
However, no matter what things I change in get_items' loop to get it to work, I always return the original empty list of item_selection. I am trying to select the matching sub-list from the all_items global with the get_items function and then use that sub-list to make a new list that gets sent to item_selection showing the items for that store, and then format the namedtuples in it to show just the names value. Is this possible or do I need to convert the data to a dictionary?
you can use a dictionary for all items:
all_items = {
'1':[bat, shoes, dryeggs],
'2':[belt, candy, jacket],
'3':[cars, crayons, toygun],
'4':[jar, matches, wax],
'5':[disc, guitar, symbol]
}
then, instead of calling items.getItems(room_id), you could just do all_items[room_id]. Hope this helps!

Find most common word in a list of sets

I'm currently working in my university projects in NLP. I'd like to display the most common words contained in this list of sets:
[{'allow', 'feel', 'fear', 'situat', 'properti', 'despit', 'face', 'ani'}, {'unpleas', 'someth', 'fear', 'make', 'abil', 'face', 'scar', 'us', 'feel'}]
This is what I've accomplished until now:
def word_list(sent):
if isinstance(sent, str):
tokens = set(word_tokenize(sent.lower()))
else:
tokens = set([t for s in sent for t in word_tokenize(s.lower())])
tokens = set([stemmer.stem(t) for t in tokens])
for w in stopword_final:
tokens.discard(w)
return tokens
def get_most_relevant_words(definitions):
list_of_words = list()
most_common_word_dict = dict()
for d1 in definitions:
list_of_words.append(word_list(d1))
for elem in list_of_words:
for word in elem:
print(word)
word_counter = Counter(word)
most_occurrences = word_counter.most_common(3)
most_common_word_dict.update({word: most_occurrences})
return most_common_word_dict
The desired output should be: {fear: 2, feel: 2}
The output that it prints is: {'feel': [('e', 2), ('f', 1), ('l', 1)]}
Use collections.Counter:
from collections import Counter
list_of_sets = [{'allow', 'feel', 'fear', 'situat', 'properti', 'despit', 'face', 'ani'}, {'unpleas', 'someth', 'fear', 'make', 'abil', 'face', 'scar', 'us', 'feel'}]
words = [word for my_set in list_of_sets for word in my_set]
c = Counter(words)
print(c)
output:
Counter({
'fear': 2,
'face': 2,
'feel': 2,
'properti': 1,
'despit': 1,
'allow': 1,
'situat': 1,
'ani': 1,
'someth': 1,
'unpleas': 1,
'make': 1,
'abil': 1,
'us': 1,
'scar': 1
})
You can simply iterate through the 2 sets, find common terms, and update the count in a dictionary. By the way, 'face' should also be included in your result.
lst = [{'allow', 'feel', 'fear', 'situat', 'properti', 'despit', 'face', 'ani'}, {'unpleas', 'someth', 'fear', 'make', 'abil', 'face', 'scar', 'us', 'feel'}]
dic = {}
for word1 in lst[0]:
for word2 in lst[1]:
if word1 == word2:
dic[word1] = dic.get(word1, 0) + 2
print(dic)
#{'fear': 2, 'feel': 2, 'face': 2}

How to search for the dictionary keys in a list

Say I have a dictionary of:
lst = {'adore': 10, 'hate': 10, 'hello': 10, 'pigeon': 1, 'would': 5, 'elections': 5}
And I have a list of:
mylist = [['a new', 'party'], ['to', 'lol'], ['compete'], ['in', 'adore', 'the 2013'], ['federal', 'elections'], ['The Free', 'Voters'], ['leadership', 'declined to'], ['join forces', 'according to', 'a leaked'], ['email from', 'Bernd Lucke'], ['Advocating', 'adore'] ]
I want to be able to search the list for the keys in the dictionary. If a word in the list is a key, then to take the value of that key and add it to a counter. In the end, to have a total sum of all the values.
Is there a way to do this?
Like this?
lst = {'adore': 10, 'hate': 10, 'hello': 10, 'pigeon': 1, 'would': 5, 'elections': 5}
mylist = [['a new', 'party'], ['to', 'lol'], ['compete'], ['in', 'adore', 'the 2013'], ['federal', 'elections'], ['The Free', 'Voters'], ['leadership', 'declined to'], ['join forces', 'according to', 'a leaked'], ['email from', 'Bernd Lucke'], ['Advocating', 'adore']]
print([lst.get(i) for j in mylist for i in j if lst.get(i) != None])
print(sum([lst.get(i) for j in mylist for i in j if lst.get(i) != None]))
Output:
[10, 5, 10]
25
If you don't like them in one line:
total = []
for i in mylist:
for j in i:
if lst.get(i) != None:
total.append(lst.get(i))
print(sum(total))
Probably you can do this in a more pythonic way.
lst = {'adore': 10, 'hate': 10, 'hello': 10, 'pigeon': 1, 'would': 5}
counter = {'adore': 0, 'hate': 0, 'hello': 0, 'pigeon': 0, 'would': 0}
mylist = [['a new', 'party'], ['to', 'lol'], ['compete'], ['in', 'adore', 'the 2013'], ['federal', 'elections'], ['The Free', 'Voters'], ['leadership', 'declined to'], ['join forces', 'according to', 'a leaked'], ['email from', 'Bernd Lucke'], ['Advocating', 'adore'] ]
def func():
for key in lst.keys():
for item in mylist:
if key in item:
counter[key] = counter[key] + lst[key]
func()
print sum(counter.values())

Python counter not recognizing dictionary

I have a list and a dictionary and I want to ultimately find a sum of the values in the two. For example, I want the code below to return :
{gold coin : 45, rope : 1, dagger : 6, ruby : 1}
First I right a function to turn the dragonLoot list into a dictionary and then I run a Counter to add the two dictionaries together. However, when I run the code I get the following:
{'ruby': 1, 'gold coin': 3, 'dagger': 1}
Counter({'gold coin': 42, 'dagger': 5, 'rope': 1})
For some reason it looks like the Counter is not recognizing the dictionary that I create from dragonLoot. Does anyone have any suggestions on what I am doing wrong? Thanks!
inv = {'gold coin' : 42, 'rope' : 1, 'dagger' : 5}
dragonLoot = ['gold coin','dagger','gold coin','gold coin','ruby']
def inventory(item):
count = {}
for x in range(len(item)):
count.setdefault(item[x],0)
count[item[x]] = count[item[x]] + 1
print(count)
inv2 = inventory(dragonLoot)
from collections import Counter
dicts = [inv,inv2]
c = Counter()
for d in dicts:
c.update(d)
print(c)
You don't need the inventory function: Counter will count the iterable for you. You can also use + with Counter. Combine these, and you can do quite simply
inv = Counter({'gold coin' : 42, 'rope' : 1, 'dagger' : 5})
dragonLoot = ['gold coin','dagger','gold coin','gold coin','ruby']
inv += Counter(dragonLoot)
After this is run, inv will be Counter({'gold coin': 45, 'dagger': 6, 'rope': 1, 'ruby': 1}), as desired.
You are not returning the count in your inventory method:
def inventory(item):
count = {}
for x in range(len(item)):
count.setdefault(item[x],0)
count[item[x]] = count[item[x]] + 1
print(count)
You are simply printing your inventory calculation. Change that print to a return, or add a return line after the print:
def inventory(item):
count = {}
for x in range(len(item)):
count.setdefault(item[x],0)
count[item[x]] = count[item[x]] + 1
print(count)
return count
Adding that to your code and running it, gives this output:
Counter({'gold coin': 45, 'dagger': 6, 'rope': 1, 'ruby': 1})
Alternatively, the implementation provided by #nneonneo is optimal.
Here is an other way to do it without the Counter:
dragonLoot = ['gold coin','dagger','gold coin','gold coin','ruby']
inv = {'gold coin' : 42, 'rope' : 1, 'dagger' : 5}
for i in dragonLoot:
inv[i] = inv.get(i, 0) +1
print (inv)
Output:
{'gold coin': 45, 'rope': 1, 'dagger': 6, 'ruby': 1}

Dates to categories

I have an Excel spreadsheet I'm preparing to migrate to Access and the date column has entries in multiple formats such as: 1963 to 1969, Aug. 1968 to Sept. 1968, 1972, Mar-73, 24-Jul, Oct. 2, 1980, Aug 29, 1980, July 1946, etc. and 'undated'. I'm pulling the column that will be the key (map number) and date column into a csv and writing back to a csv.
I can strip out years that are 4 digit, but not ranges. And I'm stumped how to extract days and 2 digit years short of re-formatting by hand. My code isn't very elegant and probably not best practice:
import csv, xlwt, re
# create new Excel document and add sheet
# from tempfile import TemporaryFile
from xlwt import Workbook
book = Workbook()
sheet1 = book.add_sheet('Sheet 1')
# populate first row with header
sheet1.write(0,0,"Year")
sheet1.write(0,1,"Map")
sheet1.write(0,2,"As Entered")
# count variable for populating sheet
rowCount=0
# open csv file and read
with open('C:\dateTestMSDOs.csv', 'rb') as f:
reader=csv.reader(f)
for row in reader:
map = row[0] # first row is map number
dateRaw = row[1] # second row is raw date as entered
# write undated and blank entries
if dateRaw == 'undated':
yearStr = '0000'
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
if dateRaw == '':
yearStr = 'NoEntry'
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
# search and write instances of four consecutive digits
try:
year = re.search(r'\d\d\d\d', dateRaw)
yearStr= year.group()
#print yearStr, map, dateRaw
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
# if none exist flag for cleaning spreadsheet and print
except:
#print 'Nope', map, dateRaw
rowCount +=1
yearStr='Format'
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
yearStr=''
dateRaw=''
book.save('D:\dateProperty.xls')
print "Done!"
I would like to write day and month to an additional column as well as pull the second 4 digit date of range entries.
You can try using dateutil for this. I think you'd still need to deal with some of the more difficult formats in a different way though. See a sample implementation below:
Code:
import dateutil.parser as dateparser
date_list = ['1963 to 1969',
'Aug. 1968 to Sept. 1968',
'Mar-73',
'24-Jul',
'Oct. 2 1980',
'Aug 29, 1980',
'July 1946',
'undated']
for d in date_list:
if 'to' in d:
a, b = d.split('to')
# Get the higher number. Use min to get lower of two.
print max(dateparser.parse(a.strip()).year, dateparser.parse(b.strip()).year)
elif d == 'undated':
print '0000'
else:
yr = dateparser.parse(d).year
print yr
Result:
1969
1968
1973
2014
1980
1980
1946
0000
[Finished in 0.4s]
Only glaring issue I can see is that 24-Jul returns a date of 2014 because the parser assumes the current day, month, or year in place of missing component, ie. Mar-73 will become 1973-03-20 if today is the 20th of the month, etc.
Not entirely sure if this is what you were going for or not but I just used a "simple" regex search and then traversed through the sets of groups that matched, applying the given function defined. If a match is found then the function that is called (found in the regex_groups variable) should return a dictionary with the following keys: start_day, start_month, start_year, end_day, end_month, end_year
Then you can do whatever you'd like with those values. Definitely not the cleanest solution but it works, as far as I can tell.
#!/usr/local/bin/python2.7
import re
# Crazy regex
regex_pattern = '(?:(\d{4}) to (\d{4}))|(?:(\w+)\. (\d{4}) to (\w+)\. (\d{4}))|(?:(\w+)-(\d{2}))|(?:(\d{2})-(\w+))|(?:(\w+)\. (\d+), (\d{4}))|(?:(\w+) (\d+), (\d{4}))|(?:(\w+) (\d{4}))|(?:(\d{4}))'
date_strings = [
'1963 to 1969',
'Aug. 1968 to Sept. 1968',
'1972',
'Mar-73',
'24-Jul',
'Oct. 2, 1980',
'Aug 29, 1980',
'July 1946',
]
# Here you set the group matching functions that will be called for a matching group
regex_groups = {
(1,2): lambda group_matches: {
'start_day': '', 'start_month': '', 'start_year': group_matches[0],
'end_day': '', 'end_month': '', 'end_year': group_matches[1]
},
(3,4,5,6): lambda group_matches: {
'start_day': '', 'start_month': group_matches[0], 'start_year': group_matches[1],
'end_day': '', 'end_month': group_matches[2], 'end_year': group_matches[3]
},
(7,8): lambda group_matches: {
'start_day': '', 'start_month': group_matches[0], 'start_year': group_matches[1],
'end_day': '', 'end_month': '', 'end_year': ''
},
(9,10): lambda group_matches: {
'start_day': group_matches[1], 'start_month': '', 'start_year': group_matches[0],
'end_day': '', 'end_month': '', 'end_year': ''
},
(11,12,13): lambda group_matches: {
'start_day': group_matches[1], 'start_month': group_matches[0], 'start_year': group_matches[2],
'end_day': '', 'end_month': '', 'end_year': ''
},
(14,15,16): lambda group_matches: {
'start_day': group_matches[1], 'start_month': group_matches[0], 'start_year': group_matches[2],
'end_day': '', 'end_month': '', 'end_year': ''
},
(17,18): lambda group_matches: {
'start_day': '', 'start_month': group_matches[0], 'start_year': group_matches[1],
'end_day': '', 'end_month': '', 'end_year': ''
},
(19,): lambda group_matches: {
'start_day': '', 'start_month': '', 'start_year': group_matches[0],
'end_day': '', 'end_month': '', 'end_year': ''
},
}
for ds in date_strings:
matches = re.search(regex_pattern, ds)
start_month = ''
start_year = ''
end_month = ''
end_year = ''
for regex_group, group_func in regex_groups.items():
group_matches = [matches.group(sub_group_num) for sub_group_num in regex_group]
if all(group_matches):
match_data = group_func(group_matches)
print
print 'Matched:', ds
print '%s to %s' % ('-'.join([match_data['start_day'], match_data['start_month'], match_data['start_year']]), '-'.join([match_data['end_day'], match_data['end_month'], match_data['end_year']]))
# match_data is a dictionary with keys:
# * start_day
# * start_month
# * start_year
# * end_day
# * end_month
# * end_year
# If a group doesn't contain one of those items, then it is set to a blank string
Outputs:
Matched: 1963 to 1969
--1963 to --1969
Matched: Aug. 1968 to Sept. 1968
-Aug-1968 to -Sept-1968
Matched: 1972
--1972 to --
Matched: Mar-73
-Mar-73 to --
Matched: 24-Jul
Jul--24 to --
Matched: Oct. 2, 1980
2-Oct-1980 to --
Matched: Aug 29, 1980
29-Aug-1980 to --
Matched: July 1946
-July-1946 to --
You could define all the possible cases of dates using regex, something like:
import re
s = ['1963 to 1969', 'Aug. 1968 to Sept. 1968',
'1972', 'Mar-73', '03-Jun', '24-Jul', 'Oct. 2, 1980', 'Oct. 26, 1980',
'Aug 29 1980', 'July 1946']
def get_year(date):
mm = re.findall("\d{4}", date)
if mm:
return mm
mm = re.search("\w+-(\d{2})", date)
if mm:
return [mm.group(1)]
def get_month(date):
mm = re.findall("[A-Z][a-z]+", date)
if mm:
return mm
def get_day(date):
d_expr = ["(\d|\d{2})\-[A-Z][a-z]+","[A-Z][a-z]+[\. ]+(\d|\d{2}),"]
for expr in d_expr:
mm = re.search(expr, date)
if mm:
return [mm.group(1)]
d = {}
m = {}
y = {}
for idx, date in enumerate(s):
d[idx] = get_day(date)
m[idx] = get_month(date)
y[idx] = get_year(date)
print "Year Dict: ", y
print "Month Dict: ", m
print "Day Dict: ", d
As result you get dictionaries of days, month, and years. They could be used to populate the rows.
Output:
Year Dict: {0: ['1963', '1969'], 1: ['1968', '1968'], 2: ['1972'], 3: ['73'], 4: None, 5: None, 6: ['1980'], 7: ['1980'], 8: ['1980'], 9: ['1946']}
Month Dict: {0: None, 1: ['Aug', 'Sept'], 2: None, 3: ['Mar'], 4: ['Jun'], 5: ['Jul'], 6: ['Oct'], 7: ['Oct'], 8: ['Aug'], 9: ['July']}
Day Dict: {0: None, 1: None, 2: None, 3: None, 4: ['03'], 5: ['24'], 6: ['2'], 7: ['26'], 8: None, 9: None}
Thank you for the innovative suggestions. After consideration we decided to remove day and month from what would be searchable in our database, since only a relatively small amount of our data had that level of detail. Here is the code I use to extract and generate the data I needed from a long and messy list.
import csv, xlwt, re
# create new Excel document and add sheet
from xlwt import Workbook
book = Workbook()
sheet1 = book.add_sheet('Sheet 1')
# populate first row with header
sheet1.write(0,0,"MapYear_(Parsed)")
sheet1.write(0,1,"Map_Number")
sheet1.write(0,2,"As_Entered")
# count variable for populating sheet
rowCount=0
# open csv file and read
yearStr = ''
with open('C:\mapsDateFix.csv', 'rb') as f:
reader=csv.reader(f)
for row in reader:
map = row[0] # first row is map number
dateRaw = row[1] # second row is raw date as entered
# write undated and blank entries
if dateRaw == 'undated':
yearStr = 'undated'
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
#yearStr=''
if yearStr != 'undated':
if dateRaw == '':
yearStr = 'NoEntry'
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
#yearStr=''
# search and write instances of four consecutive digits
if yearStr != dateRaw:
try:
year = re.search(r'\d\d\d\d', dateRaw)
yearStr= year.group()
#print yearStr, map, dateRaw
rowCount +=1
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
# if none exist flag for cleaning spreadsheet and print
except:
#print 'Nope', map, dateRaw
rowCount +=1
yearStr='Format'
sheet1.write(rowCount, 0, yearStr)
sheet1.write(rowCount, 1, map)
sheet1.write(rowCount, 2, dateRaw)
#print rowCount, yearStr, map, dateRaw, '\n'
yearStr=''
yearStr=''
dateRaw=''
book.save('D:\dateProperty.xls')
print "Done!"

Categories

Resources