python - regex: we are looking for the input of a function - python

import re
def step_through_with(s):
pattern = re.compile(s + ',')
if pattern == True:
return True
else:
return False
The task is to find a word in a sentence, which is the input parameter of the function. How should the syntax look like?

If you want to find a word in a sentence, you have to take into account boundaries (so searching for 'fun' won't match 'function' for instance).
An example:
import re
def step_through_with(sentence, word):
pattern = r'\b{}\b'.format(word)
if re.search(pattern, sentence):
return True
return False
sentence = 'we are looking for the input of a function'
print step_through_with(sentence, 'input') # True
print step_through_with(sentence, 'fun') # False

Related

Checking Sentence Case in String

I wonder how to check for sentence case in string. I know about isupper() method that checks for any uppercase character. But what about sentence case? For example:
def check_string(string):
<...>
return result
check_string('This is a proper sentence case.')
True
check_string('THIS IS UPPER CASE')
False
check_string('This string is Clean')
False
A quick trick for this would be using capitalize() and check if it is equal to the original input:
def check_string(my_string):
cap_string = my_string.capitalize()
if (cap_string == my_string):
return True
return False
def check_string(string):
sentences = string.split(".")
for sentence in sentences:
sentence = sentence.strip()
if sentence and (not sentence[0].isupper() or any(filter(lambda x: x.isupper(), sentence[1:]))):
return False
return True

I want to find the first, second, and last words in a string

Given a string, I want to write 3 separate functions:
first_word is to print the first word of the string
second_word is to print the second word of the string
last_word is to print the last word of the string
If the string has 2 words in total, the output from function 2 should be equal to function 3.
Example:
sentence = "once upon a time there was a programmer"
print(first_word(sentence)) # once
print(second_word(sentence)) # upon
print(last_word(sentence)) # programmer
What I tried so far:
def first_word(str):
space = str.find(' ')
return(str[0:space])
sentence = "once upon a time there was a programmer"
print(first_word(sentence))
Output:
once
What I'm stuck on:
I'm not sure how to do the second and third functions. Is there a way to have function 1 incorporated into function 2 and 3? Then the only difference between the 3 functions is that they're different iterations of function 1. Please explain your thought process as well.
use :
def finder(str, value):
space = str.split(' ')
if value == "first":
return space[0]
elif value == "second":
return space[1]
elif value == "last":
return space[-1]
sentence = "once upon a time there was a programmer"
print(finder(sentence, "first"))
print(finder(sentence, "second"))
print(finder(sentence, "last"))
You can form a list of all the strings in the given sentence and the use the corresponding indices to get the word you want as demonstrated below
class Word_Finder:
def __init__(self,sentence):
self.sentence = sentence
self.word_list = self.sentence.split() #splits the sentence
def first_word(self):
return self.word_list[0]
def second_word(self):
return self.word_list[1]
def last_word(self):
return self.word_list[-1]
sentence = "once upon a time there was a programmer"
words = Word_Finder(sentence)
print(words.first_word())
print(words.second_word())
print(words.last_word())
Here , I am assuming that your sentence will always have 2 or more words.
To lessen the bloat of using def functions, we could use a simple lambda function that uses the split() operation.
This may look something like this:
sentence = "once upon a time there was a programmer"
find_word = lambda index: sentence.split(" ")[index]
find_word can now be given any arbitrary index to list any word you may want.
find_word(0) # returns 'once'
find_word(1) # returns 'upon'
find_word(-1) # returns 'programmer'
A def function implementation would be:
def find_word(sentence, index) -> String:
# except when index is outside of sentence length
try:
return sentence.split(" ")[index]
except IndexError:
return ""
You can use string split method to get words from your sentence https://www.w3schools.com/python/ref_string_split.asp
#!/usr/bin/env python3
# coding=utf-8
sentence = 'once upon a time there was a programmer'
def get_words(input):
return input.split(" ")
def first_word(data):
words = get_words(data)
if len(words) != 0:
print(f"First word = {words[0]}")
def second_word(data):
words = get_words(data)
if len(words) > 1:
print(f"Second word = {words[1]}")
else:
print("Second word = Null")
def last_word(data):
words = get_words(data)
if len(words) != 0:
print(f"Last word = {words[-1]}")
if __name__ == "__main__":
first_word(sentence)
second_word(sentence)
last_word(sentence)

Using regex to see if a number is in a string before others?

I am new to python, so obviously as well to using regex. I am just curious if it is possible to see if a number is inputted before other numbers in a string?
What this function(def checkzero) should be doing is checking to see if 0 is inputted before another number, and if it is then return True (For invalid)
import string
import re
plate_pattern = re.compile('\D*\d*')
def main():
plate = input("Plate: ")
tests = [exclusions, platecheck, checkzero]
invalid = any(test(plate) for test in tests)
if invalid:
print("Invalid \nNo numbers in the start/middle of the plate, first number may not be 0. \nNo punctuation allowed.\nPlate must be between 2-6 characters.")
else:
print("Valid")
def exclusions(s):
if any(c in string.punctuation for c in s):
return True
return len(s) > 6 or len(s) < 2
def platecheck(s):
return not plate_pattern.fullmatch(s)
def checkzero(s):
# Not sure how to go about creating the function to check if zero is inputted before other numbers.
pass
main()
Example of Input/Output:
Input:
test02
Output:
Invalid
...
Input:
test20
Output:
Valid
So to make it simple, we are just checking, if first occurence of a digit in given string is 0 or not.
import re
def checkzero(s):
return re.search(r'\d', s).group() == '0'
Test:
test_cases = ['test0', 'test02', 'test20', 'test002', 'test2022']
for test_case in test_cases:
print(checkzero(test_case))
Output:
>>> True
>>> True
>>> False
>>> True
>>> False
Revision in response to comment
The first number at any point can not be 0
You could look from the start of the string ^ for non-digit characters [^\d]* and then a 0 perhaps followed by a digit .*\d?, then proceed the same from there (also updated test cases):
def checkzero(s):
p = re.compile(r"^[^\d]*0.*\d?")
matched = re.search(p, s)
return re.search(p, s) != None
test_cases = ["test0", "test1", "test01", "test10", "test101"]
for test in test_cases:
print(test, end = ": ")
if checkzero(test):
print("Matched, INVALID")
else:
print("Not matched, VALID")```
Output:
test0: Matched, INVALID
test1: Not matched, VALID
test01: Matched, INVALID
test10: Not matched, VALID
test101: Not matched, VALID
Original Post
You could use re.search(). If a match is found, then a Match object will be returned, otherwise None will be returned.
def checkzero(s):
p = re.compile(r"0.*\d")
matched = re.search(p, s)
return re.search(p, s) != None
Note: if you want the 0 to be immediately followed by a digit, remove the .* in the regex expression

Exact match of a string variable in another string in python [duplicate]

I'm trying to determine whether a substring is in a string.
The issue I'm running into is that I don't want my function to return True if the substring is found within another word in the string.
For example: if the substring is; "Purple cow"
and the string is; "Purple cows make the best pets."
This should return False. Since cow isn't plural in the substring.
And if the substring was; "Purple cow"
and the string was; "Your purple cow trampled my hedge!"
would return True
My code looks something like this:
def is_phrase_in(phrase, text):
phrase = phrase.lower()
text = text.lower()
return phrase in text
text = "Purple cows make the best pets!"
phrase = "Purple cow"
print(is_phrase_in(phrase, text)
In my actual code I clean up unnecessary punctuation and spaces in 'text' before comparing it to phrase, but otherwise this is the same.
I've tried using re.search, but I don't understand regular expressions very well yet and have only gotten the same functionality from them as in my example.
Thanks for any help you can provide!
Since your phrase can have multiple words, doing a simple split and intersect won't work. I'd go with regex for this one:
import re
def is_phrase_in(phrase, text):
return re.search(r"\b{}\b".format(phrase), text, re.IGNORECASE) is not None
phrase = "Purple cow"
print(is_phrase_in(phrase, "Purple cows make the best pets!")) # False
print(is_phrase_in(phrase, "Your purple cow trampled my hedge!")) # True
Using PyParsing:
import pyparsing as pp
def is_phrase_in(phrase, text):
phrase = phrase.lower()
text = text.lower()
rule = pp.ZeroOrMore(pp.Keyword(phrase))
for t, s, e in rule.scanString(text):
if t:
return True
return False
text = "Your purple cow trampled my hedge!"
phrase = "Purple cow"
print(is_phrase_in(phrase, text))
Which yields:
True
One can do this very literally with a loop
phrase = phrase.lower()
text = text.lower()
answer = False
j = 0
for i in range(len(text)):
if j == len(phrase):
return text[i] == " "
if phrase[j] == text[i]:
answer = True
j+=1
else:
j = 0
answer = False
return answer
Or by splitting
phrase_words = phrase.lower().split()
text_words = text.lower().split()
return phrase_words in text_words
or using regular expressions
import re
pattern = re.compile("[^\w]" + text + ""[^\w]")
pattern.match(phrase.lower())
to say that we want no characters preceding or following our text, but whitespace is okay.
Regular Expressions should do the trick
import re
def is_phrase_in(phrase, text):
phrase = phrase.lower()
text = text.lower()
if re.findall('\\b'+phrase+'\\b', text):
found = True
else:
found = False
return found
Here you go, hope this helps
# Declares
string = "My name is Ramesh and I am cool. You are Ram ?"
sub = "Ram"
# Check String For SUb String
result = sub in string
# Condition Check
if result:
# find starting position
start_position = string.index(sub)
# get stringlength
length = len(sub)
# return string
output = string[start_position:len]

Find substring in string but only if whole words?

What is an elegant way to look for a string within another string in Python, but only if the substring is within whole words, not part of a word?
Perhaps an example will demonstrate what I mean:
string1 = "ADDLESHAW GODDARD"
string2 = "ADDLESHAW GODDARD LLP"
assert string_found(string1, string2) # this is True
string1 = "ADVANCE"
string2 = "ADVANCED BUSINESS EQUIPMENT LTD"
assert not string_found(string1, string2) # this should be False
How can I best write a function called string_found that will do what I need? I thought perhaps I could fudge it with something like this:
def string_found(string1, string2):
if string2.find(string1 + " "):
return True
return False
But that doesn't feel very elegant, and also wouldn't match string1 if it was at the end of string2. Maybe I need a regex? (argh regex fear)
You can use regular expressions and the word boundary special character \b (highlight by me):
Matches the empty string, but only at the beginning or end of a word. A word is defined as a sequence of alphanumeric or underscore characters, so the end of a word is indicated by whitespace or a non-alphanumeric, non-underscore character. Note that \b is defined as the boundary between \w and \W, so the precise set of characters deemed to be alphanumeric depends on the values of the UNICODE and LOCALE flags. Inside a character range, \b represents the backspace character, for compatibility with Python’s string literals.
def string_found(string1, string2):
if re.search(r"\b" + re.escape(string1) + r"\b", string2):
return True
return False
Demo
If word boundaries are only whitespaces for you, you could also get away with pre- and appending whitespaces to your strings:
def string_found(string1, string2):
string1 = " " + string1.strip() + " "
string2 = " " + string2.strip() + " "
return string2.find(string1)
The simplest and most pythonic way, I believe, is to break the strings down into individual words and scan for a match:
string = "My Name Is Josh"
substring = "Name"
for word in string.split():
if substring == word:
print("Match Found")
For a bonus, here's a oneliner:
any(substring == word for word in string.split())
Here's a way to do it without a regex (as requested) assuming that you want any whitespace to serve as a word separator.
import string
def find_substring(needle, haystack):
index = haystack.find(needle)
if index == -1:
return False
if index != 0 and haystack[index-1] not in string.whitespace:
return False
L = index + len(needle)
if L < len(haystack) and haystack[L] not in string.whitespace:
return False
return True
And here's some demo code (codepad is a great idea: Thanks to Felix Kling for reminding me)
I'm building off aaronasterling's answer.
The problem with the above code is that it will return false when there are multiple occurrences of needle in haystack, with the second occurrence satisfying the search criteria but not the first.
Here's my version:
def find_substring(needle, haystack):
search_start = 0
while (search_start < len(haystack)):
index = haystack.find(needle, search_start)
if index == -1:
return False
is_prefix_whitespace = (index == 0 or haystack[index-1] in string.whitespace)
search_start = index + len(needle)
is_suffix_whitespace = (search_start == len(haystack) or haystack[search_start] in string.whitespace)
if (is_prefix_whitespace and is_suffix_whitespace):
return True
return False
One approach using the re, or regex, module that should accomplish this task is:
import re
string1 = "pizza pony"
string2 = "who knows what a pizza pony is?"
search_result = re.search(r'\b' + string1 + '\W', string2)
print(search_result.group())
Excuse me REGEX fellows, but the simpler answer is:
text = "this is the esquisidiest piece never ever writen"
word = "is"
" {0} ".format(text).lower().count(" {0} ".format(word).lower())
The trick here is to add 2 spaces surrounding the 'text' and the 'word' to be searched, so you guarantee there will be returning only counts for the whole word and you don't get troubles with endings and beginnings of the 'text' searched.
Thanks for #Chris Larson's comment, I test it and updated like below:
import re
string1 = "massage"
string2 = "muscle massage gun"
try:
re.search(r'\b' + string1 + r'\W', string2).group()
print("Found word")
except AttributeError as ae:
print("Not found")
def string_found(string1,string2):
if string2 in string1 and string2[string2.index(string1)-1]=="
" and string2[string2.index(string1)+len(string1)]==" ":return True
elif string2.index(string1)+len(string1)==len(string2) and
string2[string2.index(string1)-1]==" ":return True
else:return False

Categories

Resources