Using Counter to create a dictionary - python

I have an output of words which I would like to use to create a dictionary where keys = word; value = word's frequency
Here is the code:
import pandas as pd
import numpy as np
import datetime
import sys
import codecs
import re
import urllib, urllib2
import nltk # Natural Language Processing
from nltk.corpus import stopwords # list of words
import string # list(string.punctuation) - produces a list of punctuations
from collections import Counter # optimized way to do this
#wordToken = ['happy', 'thursday', 'from', 'my', 'big', 'sweater', 'and', 'this', 'ART', '#', 'East', 'Village', ',', 'Manhattan', 'https', ':', '//t.co/5k8PUInmqK', 'RT', '#', 'MayorKev', ':', 'IM', 'SO', 'HYPEE', '#', 'calloutband', '#', 'FreakLikeBex', '#', 'Callout', '#', 'TheBitterEnd', '#', 'Manhattan', '#', 'Music']
# this is the output from wordToken = [token.encode('utf-8') for tweetL in tweetList for token in nltk.tokenize.word_tokenize(tweetL)]
wordTokenLw = ' '.join(map(str, wordToken))
wordTokenLw = wordTokenLw.lower()
tweetD = {}
#c = Counter(wordTokenLw)
c = Counter(word.lower() for word in wordToken) # TRYING the suggested answer
#tweetD = dict(c.most_common())
tweetD = dict(c)
print tweetD
However, my output is completely wrong:
{'\x80': 2, 'j': 4, ' ': 192, '#': 21, "'": 1, '\xa6': 2, ',': 1, '/': 37, '.': 13, '1': 1, '0': 5, '3': 2, '2': 4, '5': 3, '7': 2, '9': 2, '8': 1, ';': 1, ':': 18, '#': 14, 'b': 17, 'a': 83, 'c': 36, '\xe2': 2, 'e': 63, 'd': 16, 'g': 10, 'f': 12, 'i': 37, 'h': 33, 'k': 12, '&': 1, 'm': 38, 'l': 22, 'o': 37, 'n': 49, 'q': 5, 'p': 33, 's': 32, 'r': 44, 'u': 20, 't': 104, 'w': 11, 'v': 14, 'y': 21, 'x': 8, 'z': 5}
I think the issue is with the way my dfile is formatted ( I used a space as a separator for join function). The reason I use JOIN function is to use lower() to get everything in lowercase. However, if there is a better way which will help my end result it would be awesome to hear about it.
This is a new area for me and truly appreciate your help!
The output after trying:
c = Counter(word.lower() for word in wordToken)
{'over': 1, 'hypee': 1, '//t.co/0\xe2\x80\xa6': 1, ',': 1, 'thursday': 1, 'day': 1, 'to': 2, 'dreams': 1, 'main': 1, '#': 14, 'automotive': 1, 'tbt': 1, 'positivital': 1, '2ma': 1, 'amp': 1, 'traveiplaces': 1, '//t.co/vmbal\xe2\x80\xa6': 1, '//t.co/c9ezuknraq': 1, 'motorcycles': 1, 'river': 1, 'view': 1, '//t.co/kpeunlzoyf': 1, 'art': 1, 'reillyhunter': 1, '//t.co/5pcxnzpwhw': 1, 'mayorkev': 1, 'rt': 5, '#': 21, 'pinterest': 1, 'away': 1, 'traveltuesday': 1, 'ice': 1, '//t.co/simhceefqy': 1, 'state': 1, 'fog': 1, ';': 1, '3d': 1, 'be': 1, 'run': 1, '//t.co/xrqaa7cb3e': 1, 'taevision': 1, 'by': 1, 'on': 1, 'livemusic': 1, 'bmwmotorradusa': 1, 'taking': 1, 'calloutband': 1, 'jersey': 1, 'uber': 1, 'bell': 1, 'freaklikebex': 1, 'village': 1, '.': 1, 'from': 2, '//t.co/5k8puinmqk': 1, '//t.co/gappxrvuql': 1, '&': 1, '500px': 1, 'sweater': 1, 'callout': 1, 'next': 1, 'appears': 1, 'music': 1, 'https': 5, ':': 18, 'happy': 1, 'park': 1, 'mercedesbenz': 1, 'amcafee': 1, 'foggy': 1, 'east': 2, '7pm': 1, 'this': 2, 'of': 1, 'taxis': 1, 'my': 1, 'and': 2, 'bridge': 1, 'centralpark': 1, '//t.co/ujdzsywt0u': 1, 'toughrides': 1, '10/22': 1, 'am': 1, 'thebitterend': 1, 'bmwmotorrad': 1, 'im': 1, 'at': 2, 'in': 3, 'cream': 1, 'nj': 1, '//t.co/hnxktmvrsc': 1, 'ny': 2, 'big': 1, 'nyc': 3, 'rides': 1, 'manhattan': 10, 'nice': 1, 'week': 1, 'blue': 1, 'http': 7, 'effect': 1, 'paleteria': 1, "'m": 1, 'a': 1, '//t.co/ucgfcwp9j2': 1, 'i': 2, 'so': 1, 'bmw': 1}

When you join into a single string again, the Counter starts counting letters instead of words (since you're giving it an iterable of letters). Instead, you should make a Counter directly from the wordToken list; you can use a generator expression to call lower on each item as you put it in the counter:
c = Counter(word.lower() for word in wordToken)

It is one of the common mistakes when dealing with strings. Strings are iterable in Python and sometimes when a function takes an iterable and we end up giving strings, we find the function acting on the elements of the string, which are the chars constituting the string.
class collections.Counter([iterable-or-mapping])
In you case, you should simply do the counter on the wordToken like this.
Counter(map(lambda w: w.lower(), wordToken)

Related

Create a nested dictionary for every distinct words in a list

I have a nested list, and for each list inside I want to create a dictionary that will contain another dictionary with the words related to a certain word as a key and the times they appear as the value. For example:
from
sentences = [["i", "am", "a", "sick", "man"],
["i", "am", "a", "spiteful", "man"],
["i", "am", "an", "unattractive", "man"],
["i", "believe", "my", "liver", "is", "diseased"],
["however", "i", "know", "nothing", "at", "all", "about", "my",
"disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]
part of the dictionary returned would be:
{ "man": {"i": 3, "am": 3, "a": 2, "sick": 1, "spiteful": 1, "an": 1, "unattractive": 1}, "liver": {"i": 1, "believe": 1, "my": 1, "is": 1, "diseased": 1}...}
with as many keys as there are distinct words in the passage.
I've tried this:
d = {}
for row in sentences:
for words in rows:
if words not in d:
d[words] = 1
else:
d[words] += 1
But is only the way to count them, how could I use d as a value for another dictionary?
from collections import defaultdict
data = {}
for sentence in sentences:
for word in sentence:
data[word] = defaultdict(lambda: 0)
for sentence in sentences:
length = len(sentence)
for index1, word1 in enumerate(sentence):
for num in range(0, length - 1):
index2 = (index1 + 1 + num) % length
word2 = sentence[index2]
data[word1][word2] += 1
print(data)
sentences = [["i", "am", "a", "sick", "man"],
["i", "am", "a", "spiteful", "man"],
["i", "am", "an", "unattractive", "man"],
["i", "believe", "my", "liver", "is", "diseased"],
["however", "i", "know", "nothing", "at", "all", "about", "my",
"disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]
# "as many keys as there are distinct words in the passage"
# Well then we need to start by finding the distinct words.
# sets always help for this.
# first we flatten the list. If you don't know what this is doing,
# search "flatten nested list Python". This is a common pattern:
flat_list = [term for group in sentences for term in group]
# now use set to find distinct words
distinct_words = set(flat_list)
# variable for final dictionary
result = {}
# define this function first. See invocation below
def find_related_counts(word):
# a nice way to do counts us with
# setdefault. If the term has already
# been counted, then it just increments.
# otherwise, it will create the key and
# initialise it to the default
related_counts = {}
for group in sentences:
# is "word" related to the terms in this group?
if word in group:
# yes it is! add the other terms:
for other in group:
# except, presumably, the word itself
if other != word:
related_counts.setdefault(other, 0)
related_counts[other] += 1
return related_counts
# for each word we have a key, and must find the value
for word in distinct_words:
# when dealing with nested anythings, it helps to
# make a function, so you don't have so much
# nesting in one place and separate things out
# nicely instead
value = find_related_counts(word)
result[word] = value
print(result)
print(result["man"])
OUTPUT:
{'spiteful': {'i': 1, 'am': 1, 'a': 1, 'man': 1}, 'and': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'unattractive': {'i': 1, 'am': 1, 'an': 1, 'man': 1}, 'nothing': {'however': 1, 'i': 1, 'know': 2, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'diseased': {'i': 1, 'believe': 1, 'my': 1, 'liver': 1, 'is': 1}, 'sick': {'i': 1, 'am': 1, 'a': 1, 'man': 1}, 'man': {'i': 3, 'am': 3, 'a': 2, 'sick': 1, 'spiteful': 1, 'an': 1, 'unattractive': 1}, 'do': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'believe': {'i': 1, 'my': 1, 'liver': 1, 'is': 1, 'diseased': 1}, 'i': {'am': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1, 'believe': 1, 'my': 2, 'liver': 1, 'is': 1, 'diseased': 1, 'however': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'certain': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'what': 1, 'ails': 1, 'me': 1}, 'an': {'i': 1, 'am': 1, 'unattractive': 1, 'man': 1}, 'my': {'i': 2, 'believe': 1, 'liver': 1, 'is': 1, 'diseased': 1, 'however': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'a': {'i': 2, 'am': 2, 'sick': 1, 'man': 2, 'spiteful': 1}, 'am': {'i': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1}, 'however': {'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'about': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'not': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'for': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'liver': {'i': 1, 'believe': 1, 'my': 1, 'is': 1, 'diseased': 1}, 'know': {'however': 1, 'i': 1, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'at': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'all': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'disease': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'ails': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'me': 1}, 'me': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1}, 'what': {'however': 1, 'i': 1, 'know': 2, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'my': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'ails': 1, 'me': 1}, 'is': {'i': 1, 'believe': 1, 'my': 1, 'liver': 1, 'diseased': 1}}
{'i': 3, 'am': 3, 'a': 2, 'sick': 1, 'spiteful': 1, 'an': 1, 'unattractive': 1}

Check if a Dictionary is a Subset of another Dictionary with Key Value pairs

I have two Dictionaries resources, and available_resources:
resources = {'B': 1, 's': 2, 't': 3, 'e': 3, '!': 1, 'h': 1, 'i': 1, ' ': 3, 'o': 1, 'g': 1, 'E': 1, 'A': 1, 'x': 2, 'p': 1, 'l': 1, 'r': 1}
available_resources = {'A': 1, 'l': 1, 'g': 1, 'o': 1, 'E': 1, 'x': 1, 'p': 1, 'e': 3, 'r': 1, 't': 3, ' ': 3, 'i': 1, 's': 2, 'h': 1, 'B': 1, '!': 1}
I want to check if resources is a subset of available_resources (if each element contained in the dictionary is <= the corresponding value entry in the resources dictionary)
I've tried:
if all(available_resources.get(key, None) == val for key, val
in resources.items()):
return True
It is returning false, is there another way I can get it to work?
Could it be a simple sign error? From "==" val to "<=" val? I got true from the below.
if all(available_resources.get(key, None) <= val for key, val
in resources.items()):
return True
If all the values are integers, one approach is to use collections.Counter:
from collections import Counter
resources = {'B': 1, 's': 2, 't': 3, 'e': 3, '!': 1, 'h': 1, 'i': 1, ' ': 3, 'o': 1, 'g': 1, 'E': 1, 'A': 1, 'x': 2, 'p': 1, 'l': 1, 'r': 1}
available_resources = {'A': 1, 'l': 1, 'g': 1, 'o': 1, 'E': 1, 'x': 1, 'p': 1, 'e': 3, 'r': 1, 't': 3, ' ': 3, 'i': 1, 's': 2, 'h': 1, 'B': 1, '!': 1}
res = bool(Counter(resources) - Counter(available_resources))
print(res)
Output
True
You can use the <= operator from sets. This operator determines whether one set is a subset of the other.
As follows:
>>> resources.items() <= available_resources.items()
False
This returns False as there is a difference between the element x in the different dict. You can see this difference using the set operator ^ with will return you the symmetric difference between the dict:
>>> resources.items() ^ available_resources.items()
{('x', 1), ('x', 2)}
You need to use <= instead of ==
>>> all(available_resources.get(k, -1)<=v for k,v in resources.items())
True
Also, above method may fail if resources contains some key that doesn't exist in available_resources, and you can additionally check if the keys in resources are subset of the keys in available_resources for this condition
>>> all(available_resources.get(k, -1)<=v for k,v in resources.items()) and\
set(resources).issubset(available_resources)
True
I have tested the answers in this stackoverflow question: click here
And i think it's works for you!
all(item in available_resources.items() for item in resources.items())
# - or - #
available_resources.items() <= resources.items()

How do I get the points of each word?

I'm making a program that could count the points of two or more words. How do I get the value of each point in a list in an array? I already have a dictionary of points.
points_dictionary = {
'A': 1, 'B': 3, 'C': 3,
'D': 2, 'E': 1, 'F': 4, 'G': 2,
'H': 4, 'I': 1, 'J': 8, 'K': 5,
'L': 1, 'M': 3, 'N': 1, 'O': 1,
'P': 3, 'Q': 10, 'R': 1, 'S': 1,
'T': 1, 'U': 1, 'V': 4, 'W': 4, 'X': 8,
'Y': 4, 'Z': 10, '#': 0, '0':3
}
I have a list that looks like thiscurrwords = ['PEARS' 'MANGO' 'ORANGE]
I have made a code that can get the points of each letter but its output adds all the points.
for you in currwords:
for yeah in you:
trans = list(yeah)
trans = points_dictionary[yeah[0]]
total_words.append(trans)
final1 = sum(total_words)
print(final1)
Every time I use this code it only outputs the total points on what is on the list. How do I get the points specifically on each word like PEAR = 6 MANGO = 8 and ORANGE = 7
You could use sum together with map:
points_dictionary = {
'A': 1, 'B': 3, 'C': 3,
'D': 2, 'E': 1, 'F': 4, 'G': 2,
'H': 4, 'I': 1, 'J': 8, 'K': 5,
'L': 1, 'M': 3, 'N': 1, 'O': 1,
'P': 3, 'Q': 10, 'R': 1, 'S': 1,
'T': 1, 'U': 1, 'V': 4, 'W': 4, 'X': 8,
'Y': 4, 'Z': 10, '#': 0, '0':3
}
currwords = ['PEARS', 'MANGO', 'ORANGE']
for word in currwords:
print(word, sum(map(lambda c: points_dictionary.get(c, 0), word)))
Output
PEARS 7
MANGO 8
ORANGE 7
As an alternative you could use a generator expression:
for word in currwords:
print(word, sum(points_dictionary.get(c, 0) for c in word))
The idea of both map and the generator expression is to map the letters of each word to the corresponding point values.
Let's do it the old school way:
points_dictionary = {
'A': 1, 'B': 3, 'C': 3,
'D': 2, 'E': 1, 'F': 4, 'G': 2,
'H': 4, 'I': 1, 'J': 8, 'K': 5,
'L': 1, 'M': 3, 'N': 1, 'O': 1,
'P': 3, 'Q': 10, 'R': 1, 'S': 1,
'T': 1, 'U': 1, 'V': 4, 'W': 4, 'X': 8,
'Y': 4, 'Z': 10, '#': 0, '0':3
}
currwords = ['PEARS', 'MANGO', 'ORANGE']
sumsOfwords = []
sum = 0
i = -1
for words in currwords:
for word in words:
if words == currwords[i + 1]:
sum = sum + points_dictionary[word]
else:
sumsOfwords.append(sum)
sum = 0
sum = sum + points_dictionary[word]
i = i + 1
sumsOfwords.append(sum)
print(sumsOfwords)
OUTPUT:
[7, 8, 7]
dictionary = dict(zip(currwords, sumsOfwords))
print(dictionary)
OUTPUT:
{'PEARS': 7, 'MANGO': 8, 'ORANGE': 7}
I think the issue here is that you append trans to total_words at each iteration, but never reset its value. You could add a
total_words = []
inside of the first loop. Also, inside of your for yeah in you loop, you define trans twice, so the first one is never used. After correcting that, your code should look like this :
for you in currwords:
total_words = []
for yeah in you:
trans = points_dictionary[yeah]
total_words.append(trans)
final1 = sum(total_words)
print(final1)

python syntax error in dict comprehension

Here is what I'm trying to do:
I want to count letters frequency in a sentence.
Here is my code in python so far:
for i in line:
if i in my_count.keys():
my_count[i]+=1
else:
my_count[i]=1
Is there any way to fulfill the same goal with dict comprehension, the same we would do with list comprehension?
I have thought at something such as :
my_count = { x:(my_count[x]+=1) for x in line if x in my_count else x:1 }
But this does not pass the syntax check (SyntaxError: invalid syntax at the +=).
Thanks for your help and advices!
Counter in collections seems to accomplish this.
In [1]: line = 'the quick brown fox jumps over the lazy dog'
In [2]: from collections import Counter
In [3]: c = Counter(line)
In [4]: c
Out[4]: Counter({' ': 8, 'o': 4, 'e': 3, 'h': 2, 'r': 2, 'u': 2, 't': 2, 'a': 1, 'c': 1, 'b': 1, 'd': 1, 'g': 1, 'f': 1, 'i': 1, 'k': 1, 'j': 1, 'm': 1, 'l': 1, 'n': 1, 'q': 1, 'p': 1, 's': 1, 'w': 1, 'v': 1, 'y': 1, 'x': 1, 'z': 1})

how to make each key-value of a dictionary print on a new line?

If I have a given dictionary like this:
{'avglen': 4.419354838709677, 'count:': 93, 'mosts:': 'your', 'longs:': ['stretched'], 'shorts:': ['i', 'a'],}
how do I make each key-value print on a new line?
well its long but here is the code I'm using to get this dictionary. I pretty much added each key-value to the dictionary. So i figured out the shortest word and then I added that to the dictionary. I noticed it too that there are extra colons. but I figured its part of the values and I can use .replace() to take it away??
def build_report(freq):
report={}
freq_list=list(freq.keys())
keys=sorted(freq, key=len)
#for shorts:
shortest=keys[0]
shortest = [keys[0]]
for key in keys[1:]:
if len(key) == len(shortest[0]):
shortest.append(key)
else:
break
report["shorts:"]=shortest
#for longs:
longest=keys[-1]
longest = [keys[-1]]
for key in reversed(keys[:-1]):
if len(key) == len(longest[0]):
longest.append(key)
else:
break
report["longs:"]=longest
#for mode:
val_list=list(freq.values())
sorted_val_list=sorted(val_list)
mode=sorted_val_list[-1]
for key in freq.keys():
if freq[key]==mode:
mode_word=key
report["mosts:"]=mode_word
# for word count:
wordcount=len(list(freq.keys()))
report["count:"]=wordcount
#for average length:
avg=list(freq.keys())
average=sum(map(len,avg))/len(avg)
report["avglen"]=average
#for freq dictionary of word to word count
freqs=freq
report["freqs:"]=freqs
return report
If you really don't want to import pprint but want it to "look like" a dictionary, you could do:
print("{" + "\n".join("{!r}: {!r},".format(k, v) for k, v in d.items()) + "}")
You may be looking for pprint, the pretty printer standard library. For example:
import pprint
pprint.pprint({'avglen': 4.419354838709677,
'count:': 93,
'mosts:': 'your',
'longs:': ['stretched'],
'shorts:': ['i', 'a'],})
outputs
{'avglen': 4.419354838709677,
'count:': 93,
'longs:': ['stretched'],
'mosts:': 'your',
'shorts:': ['i', 'a']}
Iterate over dict.items and print:
>>> d = {'avglen': 4.419354838709677, 'count:': 93, 'mosts:': 'your', 'longs:': ['stretched'], 'shorts:': ['i', 'a'],}
>>> for k, v in d.items():
... print (k, '-->', v)
...
mosts: --> your
count: --> 93
avglen --> 4.41935483871
shorts: --> ['i', 'a']
longs: --> ['stretched']
Or use the pprint module:
>>> import pprint
>>> pprint.pprint(d)
{'avglen': 4.419354838709677,
'count:': 93,
'longs:': ['stretched'],
'mosts:': 'your',
'shorts:': ['i', 'a']}
for those of you using pprint wondering why your dictionary still wont print each entry to a new line: could be that your dictionary entries or the whole dictionary are too short. in this case, invoke the PrettyPrinter class directly and set the width argument accordingly, as specified in the docs:
import pprint
stuff = {'avglen': 4.41, 'count:': 93, 'shorts:': ['i', 'a']}
pretty = pprint.PrettyPrinter(width=30)
pretty.pprint(stuff)
For a quick print you could use string replace to put newlines into the output. This method does not give beautiful results if the dictionary contains lists; those lists will also get newlines.
td = {'avglen': 4.419354838709677, 'count:': 93, 'mosts:': 'your', 'longs:': ['stretched'], 'shorts:': ['i', 'a'],}
print(str(td).replace(', ',',\n '))
Outputs
{'avglen': 4.419354838709677,
'count:': 93,
'mosts:': 'your',
'longs:': ['stretched'],
'shorts:': ['i',
'a']}
I found a new way to do it with json, very easy to use.
import json
dict1 = {'0': 0, '1': 70, '2': 37, '3': 11, '4': 6, '5': 5, '6': 3, '7': 1, '8': 0, '9': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0}
print(json.dumps(dict1, indent = 4))
The result should be like this:
{
"0": 0,
"1": 70,
"2": 37,
"3": 11,
"4": 6,
"5": 5,
"6": 3,
"7": 1,
"8": 0,
"9": 0,
"10": 0,
"11": 0,
"12": 0,
"13": 0,
"14": 0,
"15": 0
}
From the first answer in this thread:
How to print a dictionary line by line in Python?
In your past six questions or so, you seem to be using this poor dictionary as a text-indexed object of sorts. Why not make it a proper class?
from collections import Counter
textfreq = {
'I': 1, 'heaven': 1, 'filled': 1, 'their': 1, 'termed': 1, 'of': 4,
'And': 3, 'parts': 1, 'neer': 1, 'to': 2, 'song': 1, 'poets': 1,
'The': 1, 'a': 2, 'were': 2, 'verse': 1, 'your': 6, 'knows': 1,
'not': 1, 'half': 1, 'number': 1, 'but': 1, 'yours': 1, 'come': 2,
'rage': 1, 'age': 2, 'Though': 1, 'men': 1, 'fresh': 1, 'heavenly': 1,
'say': 1, 'alive': 1, 'truth': 1, 'this': 1, 'If': 2, 'than': 1,
'old': 1, 'believe': 1, 'Which': 1, 'that': 1, 'You': 1, 'faces': 1,
'yet': 1, 'poet': 1, 'in': 4, 'life': 1, 'most': 1, 'earthly': 1,
'will': 1, 'hides': 1, 'my': 3, 'papers': 1, 'is': 1, 'stretched': 1,
'rights': 1, 'eyes': 1, 'it': 3, 'yellowed': 1, 'Such': 1, 'So': 1,
'all': 1, 'lies': 1, 'the': 1, 'an': 1, 'as': 1, 'write': 1,
'child': 1, 'deserts': 1, 'shows': 1, 'tongue': 1, 'twice': 1,
'Be': 1, 'high': 1, 'some': 1, 'could': 1, 'should': 2, 'and': 2,
'touched': 1, 'like': 1, 'would': 1, 'Who': 1, 'tomb': 1, 'numbers': 1,
'antique': 1, 'scorned': 1, 'metre': 1, 'time': 2, 'touches': 1,
'be': 1, 'with': 2, 'true': 1, 'beauty': 1, 'rhyme': 1, 'less': 1,
'But': 1, 'graces': 1, 'live': 1
}
class TextStats():
def __init__(self, text=''):
if hasattr(text, 'wordfreq'):
# copy an existing TextStats object
self.wordfreq = Counter(text.wordfreq)
elif hasattr(text, 'keys'):
# load from an existing dict or Counter
self.wordfreq = Counter(text)
else:
# parse from a string
# TO DO - strip all punctuation
self.wordfreq = Counter(w for w in text.lower().split() if w)
#classmethod
def from_file(cls, fname):
with open(fname) as inf:
text = ' '.join(line.strip() for line in inf.readlines())
return cls(text.translate(None, '`~!##$\'"'))
def __add__(self, otherTextStats):
return TextStats(self.wordfreq + otherTextStats.wordfreq)
def __str__(self):
return(
"Count: {}\n"
"Average len: {:0.4f}\n"
"Shortest: {}\n"
"Most common: {}\n"
"Longest: {}\n".format(
self.total_words,
self.average_word_length,
self.shortest_words,
self.most_common_words,
self.longest_words
)
)
#property
def unique_words(self):
return len(self.wordfreq)
#property
def total_words(self):
return sum(self.wordfreq.values())
#property
def total_letters(self):
return sum(len(w)*c for w,c in self.wordfreq.items())
#property
def average_word_length(self):
return float(self.total_letters) / self.total_words
#property
def shortest_words(self):
minlen = len(min(self.wordfreq, key=len))
return sorted(w for w in self.wordfreq if len(w)==minlen)
#property
def most_common_words(self):
most_common = self.wordfreq.most_common()
howmany = most_common[0][1] if most_common else 0
return sorted(w for w,c in most_common if c == howmany)
#property
def longest_words(self):
maxlen = len(max(self.wordfreq, key=len))
return sorted(w for w in self.wordfreq if len(w)==maxlen)
def main():
t = TextStats(textfreq)
u = TextStats.from_file('corpus.txt')
v = t + u
print(t)
print()
print(u)
print()
print(v)
if __name__=="__main__":
main()
resources = {
"water": 300,
"milk": 200,
"coffee": 100, }
for key in resources:
print (f"{key}: {resources[key]}")
outputs
water: 300
milk: 200
coffee: 100
Can do it this way without having to import any modules. Just iterate over the dictionary and print a new line after each key:value pair.
dict = {'1':1,'2':2,'3':3}
for key,value in dict.items():
print(key,value,'\n')

Categories

Resources