I'm sorry if I post this on the wrong forum, but is there any way to improve my code to run faster with multi-threads, processes or other improvements?
The purpose of this script is to find all possible words for a scrabble game based on the word you type in and calculate it's scrabble score.
When I enter a word that has more than 7 characters, it takes forever to make the computations.
scores = {"a": 1, "c": 3, "b": 3, "e": 1, "d": 2, "g": 2,
"f": 4, "i": 1, "h": 4, "k": 5, "j": 8, "m": 3,
"l": 1, "o": 1, "n": 1, "q": 10, "p": 3, "s": 1,
"r": 1, "u": 1, "t": 1, "w": 4, "v": 4, "y": 4,
"x": 8, "z": 10}
WORDS = []
combs = dict()
def prepareDict(file):
try:
f = open(file, 'r')
for line in f:
WORDS.append(line.rstrip().lower())
except OpenErrors:
print("Could not open file")
finally:
f.close()
def combinations(word):
for i in range(len(word)+1):
combList = itertools.permutations(word, i)
for item in combList:
item = ''.join(item)
if item in WORDS:
value = 0
for c in item:
value += int(scores.get(c))
combs[item] = value
return (combs)
if __name__ == "__main__":
prepareDict('sowpods.txt')
if len(sys.argv) > 2 or len(sys.argv) < 2:
print("usage: %s <word>" % sys.argv[0])
sys.exit(1)
else:
word = sys.argv[1].lower()
combs = combinations(word)
sorted_combs = sorted(combs.items(), key=operator.itemgetter(1), reverse=True)
for word in sorted_combs:
print(word)
Change WORDS = [] into a set():
WORDS = set()
then change the method to add words to it:
from:
WORDS.append(line.rstrip().lower())
to:
WORDS.add(line.rstrip().lower())
No reason to use a list for that. This should improve performance.
Related
So I have this document called new.txt which contains many digits of pi (see https://www.piday.org/million/) and I want to split the number into a list made out of all these digits in order. My code works but it's extremely slow, (I have tried with a less digits of pi).
def sort(stuff):
for iter_num in range(len(stuff)-1,0,-1):
for idx in range(iter_num):
if stuff[idx]>stuff[idx+1]:
temp = stuff[idx]
stuff[idx] = stuff[idx+1]
stuff[idx+1] = temp
return stuff
a = []
with open("/Users/serax/desktop/new.txt", "r") as r:
for line in r.readlines():
for char in line:
text = a.append(char)
print(sort(a))
thanks to the comments i have edited my code, here's the result.
thisdict = {
".": 0,
"0": 0,
"1": 0,
"2": 0,
"3": 0,
"4": 0,
"5": 0,
"6": 0,
"7": 0,
"8": 0,
"9": 0,
}
with open("/Users/serax/documents/new.txt", "r") as r:
for line in r.readlines():
# print(sorted(line)) # built in function that can sort str/list/tuple (works)
for char in line:
for key in thisdict:
if char == key:
thisdict[key] += 1
ordered = ""
for i in thisdict:
ordered = ordered + i*thisdict[i]
print(ordered)
I am working on an API that returns JSON. I am logging my responses, and sometimes the JSON is just absurdly long and basically clogs my log files. Is there a neat way to reduce the length of a JSON, purely for visually logging the data? (not in effect in production)
The basic approach is to reduce arrays over a length of 5 to [first 2, "...", last 2], and dictionaries with more than 4 items to {first 4, "..." : "..."}
The code below is ugly. I am aware that it should be a recursive solution that reduces the items in the same way for a JSON of arbitrary depth - it currently only does so for depth 2.
def log_reducer(response_log):
original_response_log = response_log
try:
if type(response_log) == dict:
if len(response_log) >= 4: # {123456}
response_log = dict(list(response_log.items())[:4])
response_log.update({"...": "..."}) # {1234...}
for key, value in response_log.items():
if type(value) == list:
if len(value) >= 5: # {key:[123456]}
new_item = value[:2] + ['...'] + value[-2:] # {[12...56]}
response_log.update({key: new_item})
if type(value) == dict:
if len(value) >= 4: # {key:{123456}}
reduced_dict = dict(list(value.items())[:4])
reduced_dict.update({"...": "..."})
response_log.update({key: reduced_dict}) # {{1234...}}
elif type(response_log) == list:
if len(response_log) >= 5: # [123456]
response_log = response_log[:2] + ['...'] + response_log[-2:] # [12...56]
for inner_item in response_log:
if type(inner_item) == list:
if len(inner_item) >= 5: # [[123456]]
reduced_list = inner_item[:2] + ['...'] + inner_item[-2:] # [[12...56]]
response_log.remove(inner_item)
response_log.append(reduced_list)
if type(inner_item) == dict:
if len(inner_item) >= 4: # [{123456}]
reduced_dict = dict(list(inner_item.items())[:4])
reduced_dict.update({"...": "..."}) # [{1234...}]
response_log.remove(inner_item)
response_log.append(reduced_dict)
except Exception as e:
return original_response_log
return response_log
The returned response_log is then logged with logger.info(str(response_log))
As you can see, the fact that there can be either arrays or dictionaries at every level makes this task a little more complex, and I am struggling to find a library or code snipped of any kind which would simplify this. If anyone wants to give it a shot, I would appreciate it a lot.
you can use a test JSON like this to see it in effect:
test_json = {"works": [1, 2, 3, 4, 5, 6],
"not_affected": [{"1": "1", "2": "2", "3": "3", "4": "4", "5": "5"}],
"1": "1", "2": "2", "3": "3",
"removed": "removed"
}
print("original", test_json)
reduced_log = log_reducer(test_json)
print("reduced", reduced_log)
print("original", test_json)
reduced_log = log_reducer([test_json]) # <- increases nesting depth
print("reduced", reduced_log)
This answer uses #calceamenta's idea, but implements the actual cutting-down logic:
def recursive_reduce(obj):
if isinstance(obj, (float, str, int, bool, type(None))):
return obj
if isinstance(obj, dict):
keys = list(sorted(obj))
obj['...'] = '...'
if len(keys) > 5:
new_keys = keys[:2] + ["..."] + keys[-2:]
else:
new_keys = keys
new_dict = {x:obj[x] for x in new_keys}
for k, v in new_dict.items():
new_dict[k] = recursive_reduce(v)
return new_dict
if isinstance(obj, list):
if len(obj) > 5:
new_list = obj[:2] + ["..."] + obj[-2:]
else:
new_list = obj
for i, v in enumerate(new_list):
new_list[i] = recursive_reduce(v)
return new_list
return str(obj)
test_json = {"works": [1, 2, 3, 4, 5, 6],
"not_affected": [{"1": "1", "2": "2", "3": "3", "4": "4", "5": "5"}],
"1": "1", "2": "2", "3": "3",
"removed": "removed"
}
print("original", test_json)
reduced_log = recursive_reduce(test_json)
print("reduced", reduced_log)
Output:
original {'works': [1, 2, 3, 4, 5, 6], 'not_affected': [{'1': '1', '2': '2', '3': '3', '4': '4', '5': '5'}], '1': '1', '2': '2', '3': '3', 'removed': 'removed'}
reduced {'1': '1', '2': '2', '...': '...', 'removed': 'removed', 'works': [1, 2, '...', 5, 6]}
Hope this helps :)
You can overwrite the string representation of dicts and lists in python using the def __str__(): method. Using this just recursively call the print function on all elements. It can have a simple boilerplate like this:
def custom_print(obj):
log_str = ''
if type(obj) == list:
for item in obj:
log_str += custom_print(item)
elif type(obj) == dict:
for k, item in obj.items():
custom_print(item)
Use this custom log function to print into your log file as per your log file format.
I'm making a python script that accepts 7 letters and returns the highest scoring word along with all other possible words. At the moment it has a few "loops in loops" and others things that will slow down the process.
import json
#open file and read the words, output as a list
def load_words():
try:
filename = "dictionary_2.json"
with open(filename,"r") as english_dictionary:
valid_words = json.load(english_dictionary)
return valid_words
except Exception as e:
return str(e)
#make dictionary shorter as there will be maximum 7 letters
def quick():
s = []
for word in load_words():
if len(word)<7:
s.append(word)
return s
# takes letters from user and creates all combinations of the letters
def scrabble_input(a):
l=[]
for i in range(len(a)):
if a[i] not in l:
l.append(a[i])
for s in scrabble_input(a[:i] + a[i + 1:]):
if (a[i] + s) not in l:
l.append(a[i] + s)
return l
#finds all words that can be made with the input by matching combo's to the dictionary and returns them
def word_check(A):
words_in_dictionary = quick()
for word in scrabble_input(A):
if word in words_in_dictionary:
yield word
#gives each word a score
def values(input):
# scrabble values
score = {"a": 1, "c": 3, "b": 3, "e": 1, "d": 2, "g": 2,
"f": 4, "i": 1, "h": 4, "k": 5, "j": 8, "m": 3,
"l": 1, "o": 1, "n": 1, "q": 10, "p": 3, "s": 1,
"r": 1, "u": 1, "t": 1, "w": 4, "v": 4, "y": 4,
"x": 8, "z": 10}
word_total = 0
for word in word_check(input):
for i in word:
word_total = word_total + score[i.lower()]
yield (word_total, str(word))
word_total = 0
#prints the tuples that have (scrabble score, word used)
def print_words(a):
for i in values(a):
print i
#final line to run, prints answer
def answer(a):
print ('Your highest score is', max(values(a))[0], ', and below are all possible words:')
print_words(a)
answer(input("Enter your 7 letters"))
I have removed some of the for loops and have tried to make the json dictionary I found shorter by limiting it to 7 letter words max. I suppose I could do that initially so that it doesn't need to do that each time i run the script. Any other tips on how to speed it up?
I am duplicating Facebook's chat read receipt system. I wrote some basic code I think works. However my boss thinks it would be slow. I have no algorithms training. What is the most efficient way to return a mapping of indexes to numbers where the numbers are between two numbers in a sorted list and the index is the index of the first number in the between pair?
# Given say {"a": 3, "b": 10, "c": 7, "d": 19} and [1,5,15] return {0: ["a"], 1: ["b", "c"], 2: ["d"]}
def find_read_to(read_dates, message_dates):
read_indexes_to_user_ids = {}
for user_id in read_dates:
for i, date in enumerate(message_dates):
last_index = i + 1 == len(message_dates)
next_index = -1 if last_index else i + 1
if last_index or (read_dates[user_id] >= date and read_dates[user_id] < message_dates[next_index]):
if i in read_indexes_to_user_ids:
read_indexes_to_user_ids[i].append(user_id)
else:
read_indexes_to_user_ids[i] = [user_id]
break
return read_indexes_to_user_ids
find_read_to({"a": 3, "b": 10, "c": 7, "d": 19}, [1,5,15])
Version using bisect module
import bisect
def find_read_to(read_dates, message_dates):
read_indexes_to_user_ids = {}
user_ids, read_dates = zip(*read_dates.items())
def find_between(read_date):
answer = bisect.bisect_left(message_dates, read_date)
answer -= 1
if answer == -1:
return None
return answer
indexes_for_read_up_to = map(find_between, read_dates)
for i, index_for_read_up_to in enumerate(indexes_for_read_up_to):
user_id = user_ids[i]
if index_for_read_up_to is None:
continue
if index_for_read_up_to in read_indexes_to_user_ids:
read_indexes_to_user_ids[index_for_read_up_to].append(user_id)
else:
read_indexes_to_user_ids[index_for_read_up_to] = [user_id]
return read_indexes_to_user_ids
find_read_to({"a": 3, "b": 10, "c": 7, "d": 19}, [1,5,15])
I'm writing the following code and I'm trying not to add ' ' (a single space character) to my dictionary. I thought the following would work for my function. Do I need to write a more in-depth try and except statement? I get a KeyError when I run the code as is.
score = {"a": 1, "c":3, "b":3, "e": 1, "d": 2, "g": 2, "f": 4, "i" : 1, "h" : 4, "k": 5, "j" : 8, "m": 3, "l":1, "o": 1, "n":1,
"q":10, "p": 3, "s": 1, "r": 1, "u":1, "t": 1, "w": 4, "v": 4,"y": 4, "x": 8, "z": 10,}
def scrabble_score(word):
total = 0
for i in word:
total += score[i.lower()]
if i == ' ':
continue
return total
print scrabble_score('ten')
print scrabble_score('i like chicken')
In my opinion it would be better to ignore any char that is not in the table, not just spaces.
For this you can use
for c in word:
total += score.get(c, 0)
the second parameter of dict.get is the value to use if the value is not present in the dictionary.
Just change the if-statement with the previous line:
total += score[i.lower()]
if i == ' ':
continue
Id est, use:
if i == ' ':
continue
total += score[i.lower()]
edit: removed wrong explanation, see comments.