Transforming source word into target word

Transforming source word into target word - python

I need some help with my code. I need to convert one input word into another, changing one letter at a time. currently my program does this but very inefficiently and does not find the shortest route. Any help would be appreciated.
import re
def same(item, target):
return len([c for (c, t) in zip(item, target) if c == t])
def build(pattern, words, seen, list):
return [word for word in words
if re.search(pattern, word) and word not in seen.keys() and
word not in list]
def find(word, words, seen, target, path):
list = []
for i in range(len(word)):
list += build(word[:i] + "." + word[i + 1:], words, seen, list)
if len(list) == 0:
return False
list = sorted([(same(w, target), w) for w in list])
for (match, item) in list:
if match >= len(target) - 1:
if match == len(target) - 1:
path.append(item)
return True
seen[item] = True
for (match, item) in list:
path.append(item)
if find(item, words, seen, target, path):
return True
path.pop()
fname = 'dictionary.txt'
file = open(fname)
lines = file.readlines()
while True:
start = input("Enter start word:")
words = []
for line in lines:
word = line.rstrip()
if len(word) == len(start):
words.append(word)
target = input("Enter target word:")
break
count = 0
path = [start]
seen = {start : True}
if find(start, words, seen, target, path):
path.append(target)
print(len(path) - 1, path)
else:
print("No path found")
edit: Below is another failed attempt by me to fix this problem by trying a different approach. This time it does not seem to loop properly.
def find(start, words, target): # find function. Word = start word, words =
start=list(start)
target=list(target)
print("Start word is ", start)
print("Target word is ", target)
letter = 0
while start != target:
if letter == len(target):
letter = 0
continue
elif start[letter] == target[letter]:
letter = letter + 1
continue
else:
testword = list(start)
testword[letter] = target[letter]
testword = ''.join(testword)
if testword in words:
start[letter] = target[letter]
letter = letter + 1
print(start)
continue
else:
letter = letter + 1
continue
letter = letter + 1
continue
fname = "dictionary.txt"
file = open(fname) # Open the dictionary
lines = file.readlines() # Read each line from the dictionary and store it in lines
while True: # Until ended
start = input("Enter start word:") # Take a word from the user
words = [] # Inititialise Array 'words'
for line in lines: # For each line in the dictionary
word = line.rstrip() #strip all white space and characters from the end of a string
if len(word) == len(start):
words.append(word)
if start not in words:
print("Your start word is not valid")
continue
target = input("Enter target word:")
if len(start) != len(target):
print("Please choose two words of equal length")
continue
if target not in words:
print("Your target word is not valid")
continue
break
edit: Here is the basic algorithm to the code. (Both variants are compatiable with my purpose).
-input start word
-input target word
- if len(start) = len(target)
continue
-check dictionary to see if target and start words are present
- find what letters are different from the start to target word
- change one different letter in the start word until start word
=target
word #after each letter change, output must be valid word in dictionary
The goal is to achieve this in the least amount of steps which is not achieved, the first section of code does this, I think but in a huge amount of steps I know could be far more efficient

Here's a breadth-first search that doesn't use any 3rd party modules. I don't guarantee that it finds the shortest solutions, but it appears to work. ;) It stops when it finds a solution, but due to the random order of sets each run of the program may find a different solution for a given start & target pair.
import re
# The file containing the dictionary
fname = '/usr/share/dict/words'
start, target = 'hide', 'seek'
wlen = len(start)
wrange = range(wlen)
words = set()
with open(fname) as f:
for word in f:
w = word.rstrip()
# Grab words of the correct length that aren't proper nouns
# and that don't contain non-alpha chars like apostrophe or hyphen
if len(w) == wlen and w.islower() and w.isalpha():
words.add(w)
print('word set size:', len(words))
# Build a regex to find words that differ from `word` by one char
def make_pattern(word):
pat = '|'.join(['{}.{}'.format(word[:i], word[i+1:]) for i in wrange])
return re.compile(pat)
# Find words that extend each chain of words in `seq`
def find(seq):
result = []
seen = set()
for current in seq:
pat = make_pattern(current[-1])
matches = {w for w in words if pat.match(w)} - seen
if target in matches:
return current + (target,)
result.extend(current + (w,) for w in matches)
seen.update(matches)
words.difference_update(matches)
seq[:] = result
# Search for a solution
seq = [(start,)]
words.discard(start)
while True:
solution = find(seq)
if solution:
print(solution)
break
size = len(seq)
print(size)
if size == 0:
print('No solutions found')
break
typical output
word set size: 2360
9
55
199
479
691
('hide', 'hire', 'here', 'herd', 'heed', 'seed', 'seek')
I ought to mention that all those word chains chew up a bit of RAM, I'll try to think of a more compact approach. But it shouldn't really be a problem on modern machines, unless you're working with really large words.

Using a bit of preprocessing to group equal length words, you can use the networkx 3rd party library to build a graph, then use its shortest_path algorithm to retrieve it. Note that I've used the default dictionary available on most *nix systems and limited it to words of 5 characters or less.
from collections import defaultdict
import networkx as nx
# Group the words into equal length so we only compare within words of equal length
with open('/usr/share/dict/words') as fin:
words = defaultdict(set)
for word in (line.strip() for line in fin if line.islower() and len(line) <= 6):
words[len(word)].add(word)
graph = nx.Graph()
for k, g in words.items():
while g:
word = g.pop()
matches = {w for w in g if sum(c1 != c2 for c1, c2 in zip(word, w)) == 1}
graph.add_edges_from((word, match) for match in matches)
Then, get the shortest route, eg:
In [1]: ' -> '.join(nx.shortest_path(graph, 'hide', 'seek'))
Out[1]: 'hide -> hire -> here -> herd -> heed -> seed -> seek'
In [2]: ' -> '.join(nx.shortest_path(graph, 'cat', 'dog'))
Out[2]: 'cat -> cot -> cog -> dog'

Related

Count words (even multiples) in a text with Python

I have to write a function that counts how many times a word (or a series of words) appears in a given text.
This is my function so far. What I noticed is that with a series of 3 words the functions works well, but not with 4 words and so on.
from nltk import ngrams
def function(text, word):
for char in ".?!-":
text = text.replace(char, ' ')
n = len(word.split())
countN = 0
bigram_lower = text.lower()
word_lower = word.lower()
n_grams = ngrams(bigram_lower.split(), n)
for gram in n_grams:
for i in range (0, n):
if gram[i] == word_lower.split()[i]:
countN = countN + 1
print (countN)

First thing, please fix your indentation and don't use bigrams as a variable for ngrams as it's a bit confusing (Since you are not storing just bigrams in the bigrams variable). Secondly lets look at this part of your code -
for gram in bigrams:
for i in range (0, n):
if gram[i] == word_lower.split()[i]:
countN = countN + 1
print (countN)
Here you are increasing countN by one for each time a word in your ngram matches up instead of increasing it when the whole ngram matches up. You should instead only increase countN if all the words have matched up -
for gram in bigrams:
if list(gram) == word_lower.split():
countN = countN + 1
print (countN)

May be it was already done in here
Is nltk mandatory?
# Open the file in read mode
text = open("sample.txt", "r")
# Create an empty dictionary
d = dict()
# Loop through each line of the file
for line in text:
# Remove the leading spaces and newline character
line = line.strip()
# Convert the characters in line to
# lowercase to avoid case mismatch
line = line.lower()
# Split the line into words
words = line.split(" ")
# Iterate over each word in line
for word in words:
# Check if the word is already in dictionary
if word in d:
# Increment count of word by 1
d[word] = d[word] + 1
else:
# Add the word to dictionary with count 1
d[word] = 1
# Print the contents of dictionary
for key in list(d.keys()):
print(key, ":", d[key])

This shuld work for you:
def function(text, word):
for char in ".?!-,":
text = text.replace(char, ' ')
n = len(word.split())
countN = 0
bigram_lower = text.lower()
word_lower = tuple(word.lower().split())
bigrams = nltk.ngrams(bigram_lower.split(), n)
for gram in bigrams:
if gram == word_lower:
countN += 1
print (countN)
>>> tekst="this is the text i want to search, i want to search it for the words i want to search for, and it should count the occurances of the words i want to search for"
>>> function(tekst, "i want to search")
4
>>> function(tekst, "i want to search for")
2

Issues when grabbing words of a specific length from a txt file in python

The main confusion I am having with my code is that I am aiming to find all the words in the dictionary.txt of a particular length containing just a single vowel (defined as a, e, i, o and u) that does not have a particular letter in it. However, it does not work correctly. For example, if I am looking for all the words of length 9 containing just a single vowel that does not have letter ‘t’ in it, the
program below tells me “There are no words that fit this criteria”. But there should be two
words in the file satisfying the above criteria: “lynchings”, and “lynchpins”.
My dictionary is located at https://filebin.net/96k7kso4i6nxcd2n/dictionary.txt?t=x9ujn62v
def onevowel(file):
length = int(input("Please enter the word length you are looking for: "))
letter = input("Please enter the letter you'd like to exclude: ")
wordnum = 0
for word in file:
word = word.strip()
if len(word) == length:
count = 0
for char in word:
if (char=='a' and char=='e' and char=='i' and char=='o' and char=='u'):
count += 1
if count == 1:
flag = 1
word_str = ""
for char in word:
if char == letter:
flag = 0
else:
word_str += char
if flag == 1:
print (word_str)
wordnum += 1
if wordnum == 0:
print ("There are no words that fit this criteria.")
if __name__ == "__main__":
my_file = open("dictionary.txt","r")
onevowel(my_file)
my_file.close()

Replace char=='a' and char=='e' and char=='i' and char=='o' and char=='u' (which is never true) with char in "aeoui".

Because you want only a single vowel, use the or condition rather than and in this line:
if (char=='a' and char=='e' and char=='i' and char=='o' and char=='u'):

How to access each string in the lists of list

I was given a task to input multiple lines each consisting of multiple words.The task is to uppercase the words with an odd length and lowercase the words with an
even length.
My code now looks like this, can you help me to solve it right?
first = []
while True:
line = input().split()
first.append(line)
if len(line) < 1:
break
for i in first:
for j in i:
if len(line[i][j]) % 2 == 0:
line[i][j] = line[i][j].lower()
elif len(line[i][j]) % 2 != 0:
line[i][j] = line[i][j].upper()
print(first[i])
it should look like this

i and j are not an indexes, they are the sublists and words themselves.You can do:
for i in first: # i is a list of strings
for j in range(len(i)): # you do need the index to mutate the list
if len(i[j]) % 2 == 0:
i[j] = i[j].lower()
else:
i[j] = i[j].upper()
print(' '.join(i))

So looking at the input output in your image, here is a better solution
sentences = []
while True:
word_list = input().split()
sentences = [*sentences, word_list]
if len(word_list) < 1:
break
So now that you have your input from command line you can do
[word.upper() if len(word)%2 == 1 else word.lower() for word_list in sentences for word in word_list]
or you could extract into a function
def apply_case(word):
if len(word)%2:
return word.upper()
return word.lower()
new_sentences = [apply_case(word) for word_list in sentences for word in word_list]
now you can print it like
output = "\n".join([" ".join(word_list) for word_list in new_sentences])
print(output)

You forgot to join the lines back together. Furthermore from a software design point of view, you are doing to much in the code fragment: it is better to encapsulate the functionalities in functions, like:
def wordcase(word):
if len(word) % 2 == 0: # even
return word.lower()
else: # odd
return word.upper()
Then we can even perform the processing "online" (as in line-per-line):
while True:
line = input()
if not line:
break
else:
print(' '.join(wordcase(word) for word in line.split()))

I don't think you need do be using i or j. You can just loop over the words in your string.
Further, although it probably won't speed things up, you don't need the elif, you can just use an else. There are only two options, odd and even so you only need to check it once.
sentance = 'I am using this as a test string with many words'
wordlist = sentance.split()
fixed_wordlist = []
for word in wordlist:
if len(word)%2==0:
fixed_wordlist.append(word.lower())
else:
fixed_wordlist.append(word.upper())
print(sentance, '\n', wordlist, '\n', fixed_wordlist)

Alternatives to index()

So for my project I have to allow the user to input a sentence and then input a word and find all the occourunces of the word and print the numbers. Here's what I have
found = 0
sen = input("Enter the sentence you would like to break down!")
sen1 = sen.upper()
list = sen1.split()
search=input("Enter the word you want to search")
search1 = search.upper()
for search1 in list:
found = found + 1
position=list.index(search1)
if position == 0:
print("First word in the sentence")
if position == 1:
print("Second word in the sentence")
if position == 2:
print("Third word in the sentence")
if position == 3:
print("Fourth word in the sentence")
if position == 4:
print("Fifth word in the sentence")
if position == 5:
print("6th word in the sentence")
else:
position1 = position + 1
print(position1, "th word in the sentence")
but it only prints the first occurunce of the word and rarely works. Any solutions?

Replace list with a_list.
List of positions of a search1 occurances:
positions = [idx for idx, el in enumerate(a_list) if el == search1]

You have a great alternative which is re.finditer:
import re
sen = input("Enter the sentence you would like to break down!")
search = input("Enter the word you want to search")
for match in re.finditer(search, sen):
print (match.start())

Several comments have mentioned the danger of using list as a variable name. It's not actually a reserved word, but it is the name of a built-in type, and shadowing it by using it as a variable name can lead to mysterious bugs if you later wish to use this type to construct a list or test the type of an object.
A major problem with the code you posted is here:
search1 = search.upper()
for search1 in list:
The first line saves the upper-case version of the string search to the name search1. But the next line simply clobbers that with the words in list; it does not perform any searching operation. At the end of the for loop, search1 will be equal to the last item in list, and that's why your code isn't doing what you expect it to when it executes position=list.index(search1): you're telling it to find the position of the last word in list.
You could use .index to do what you want. To find multiple occurences you need to use a loop and pass .index a starting position. Eg,
def find_all(wordlist, word):
result = []
i = 0
while True:
try:
i = wordlist.index(word, i) + 1
result.append(i)
except ValueError:
return result
However, there's really not much benefit in using .index here..index performs its scan at C speed, so it's faster than scanning in a Python loop but you probably won't notice much of a speed difference unless the list you're scanning is large.
The simpler approach is as given in Tomasz's answer. Here's a variation I wrote while Tomasz was writing his answer.
def ordinal(n):
k = n % 10
return "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (k < 4) * k::4])
def find_all(wordlist, word):
return [i for i, s in enumerate(wordlist, 1) if s == word]
sen = 'this has this like this'
wordlist = sen.upper().split()
words = 'this has that like'
for word in words.split():
pos = find_all(wordlist, word.upper())
if pos:
pos = ', '.join([ordinal(u) for u in pos])
else:
pos = 'Not found'
print('{0}: {1}'.format(word, pos))
output
this: 1st, 3rd, 5th
has: 2nd
that: Not found
like: 4th
The code for ordinal was "borrowed" from this answer.

Python strings with anagrams

At the moment this code takes in a string from a user and compares it to a text file in which many words are stored. It then outputs all the words that contain an exact match to the string. (E.G "otp = opt, top, pot) Currently when i input the string it only matches the string to the word with the EXACT same letters in a rearranged order.
My question is how do i go about being able to type in excess letters but still output all the words that are contained? for example: Type in "orkignwer" and the program will output "working" even though there are extra letters.
words = []
def isAnAnagram(word, user):
wordList= list(word)
wordList.sort()
inputList= list(user)
inputList.sort()
return (wordList == inputList)
def getAnagrams(user):
lister = [word for word in words if len(word) == len(user) ]
for item in lister:
if isAnAnagram(item, user):
yield item
with open('Dictionary.txt', 'r') as f:
allwords = f.readlines()
f.close()
for x in allwords:
x = x.rstrip()
words.append(x)
inp = 1
while inp != "99":
inp = input("enter word:")
result = getAnagrams(inp)
print(list(result))

You have to edit the isAnAnagram and the getAnagrams functions. First the getAnagrams function should be edited to also include the words of greater length in the lister list:
def getAnagrams(user):
lister = [word for word in words if len(word) <= len(user) ]
for item in lister:
if isAnAnagram(item, user):
yield item
Then you would need to edit the isAnAnagram function. As Alexander Huszagh pointed out, you can use the Counter from the collections package:
from collections import Counter
def isAnAnagram(word, user):
word_counter = Counter(word)
input_counter = Counter(user)
return all(count <= input_counter[key] for key, count in word_counter.items())
The all(count <= input_counter[key] for key, count in word_counter.items()) checks to see if every letter of word appears in user at least as many times as they did in word.
P.S. If you want a more optimized solution, you might want to checkout TRIEs (e.g. MARISA-trie, python-trie or PyTrie).

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Transforming source word into target word - python

Related

Count words (even multiples) in a text with Python

Issues when grabbing words of a specific length from a txt file in python

How to access each string in the lists of list

Alternatives to index()

Python strings with anagrams

Categories

Resources