I want to find if there is a sub-string of the string "chef" but in order in another given string with length > 1.
So basically we want strings ch, he, ef, che, hef or chef to exist in the given string.
Ex:
1> kefaa
Here we have ef which is part of ''chef'' so it is a valid string.
2> fhlasek
Here we have fh which characters exist in 'chef' but the sequence is incorrect so it is invalid.
I have this code that works but here adding substrings manually is easy as the string 'chef' has quite fewer possibilities but I want a code that will work for any given string.
import re
pattern = r"(ch|he|ef|che|hef|chef)"
s = input()
res = re.search(pattern, s)
if bool(res):
print('YES')
else:
print('NO')
P.S. Im sorry if this question was already asked and solved, I was unable to find it.
Thank You.
Pure Python:
def test(txt, string):
le = len(txt)
fragments = [txt[i:j] for i in range(le) for j in range(i+1, le+1) if j-i>1]
# 'chef' --> ['ch', 'che', 'chef', 'he', 'hef', 'ef']
for fragment in fragments:
if fragment in string: return 'YES';
return 'NO'
print(test("chef", "ch")) # YES
print(test("chef", "che")) # YES
print(test("chef", "c")) # NO
print(test("chef", "fh")) # NO
print(test("chef", "kefaa")) # YES
If you need regexp here you go:
import re
def get_reg(txt,s):
le = len(txt)
fragments = [txt[i:j] for i in range(le) for j in range(i+1, le+1) if j-i>1]
return bool(re.search("|".join(fragments),s))
# 'chef' --> 'ch|che|chef|he|hef|ef'
print(get_reg("chef","ch")) # True
print(get_reg("chef","che")) # True
print(get_reg("chef","c")) # False
print(get_reg("chef","fh")) # False
print(get_reg("chef","kefaa")) # True
Recurse:
import re
def get_framgents(word):
for i in range(len(word)-1):
fragments.append(word[:len(word)-i])
if len(word)>0:
get_framgents(word[1:])
word = 'chef'
fragments = []
get_framgents(word) # --> ['chef','che','ch','hef','he','ef']
fragments = '|'.join(fragments) # --> 'chef|che|ch|hef|he|ef'
print(bool(re.search(fragments, "ch"))) # True
print(bool(re.search(fragments, "che"))) # True
print(bool(re.search(fragments, "c"))) # False
print(bool(re.search(fragments, "fh"))) # False
print(bool(re.search(fragments, "kaeef"))) # True
You can loop through the word and build a custom regex, then use that regex in your search:
from re import search, compile
word = "chef"
s = input()
pattern = []
for i in range(len(word) - 1):
pattern.append(word[i] + word[i+1])
pattern = compile("|".join(pattern))
if bool(search(pattern, s)):
print("Yes")
else:
print("No")
For some reason after the second loop in my array the code is skipping a character for some reason.
I think here is the problem:
for word in range(int(len(ShortArray))):
localString = LongArray[word]
#print(word)
if localString[:2] == ShortArray[word]:
print(LongArray[word])
print(word)
Here is the full code:
kleuren = ["Rood","Geel","Groen","Blauw","Wit","Paars","Oranje","Zwart"]
KleurenShort = []
def splitArray(string):
for lenght in range(int(len(string) / 2)):
KleurenShort.append(string[:2])
print(KleurenShort)
string = string.strip(string[:2])
return KleurenShort
def tekst_naar_kleur(string):
return 0
def matchFirst2Letters(ShortArray,LongArray):
for word in range(int(len(ShortArray))):
localString = LongArray[word]
#print(word)
if localString[:2] == ShortArray[word]:
print(LongArray[word])
print(word)
matchFirst2Letters(splitArray("RoGeGrBl"),kleuren)
The outcome is:
['Ro']
['Ro', 'Ge']
['Ro', 'Ge', 'rB']
['Ro', 'Ge', 'rB', 'l']
when it should be:
['Ro']
['Ro', 'Ge']
['Ro', 'Ge', 'Gr']
['Ro', 'Ge', 'Gr', 'Bl']
The problem is the use of the string.strip() method.
'aaaaaabcdb'.strip('ab')
gives 'cd' as every instance of 'a' and 'b' in your input string is removed. You can simply get rid of the first two letters of the input string by indexing:
'abcde'[2:] will give 'cde'.
Implemented in your code the corrected version is:
kleuren = ["Rood","Geel","Groen","Blauw","Wit","Paars","Oranje","Zwart"]
KleurenShort = []
def splitArray(string):
for lenght in range(int(len(string) / 2)):
KleurenShort.append(string[:2])
print(KleurenShort)
string = string[2:]
return KleurenShort
def tekst_naar_kleur(string):
return 0
def matchFirst2Letters(ShortArray,LongArray):
for word in range(int(len(ShortArray))):
localString = LongArray[word]
#print(word)
if localString[:2] == ShortArray[word]:
print(LongArray[word])
print(word)
matchFirst2Letters(splitArray("RoGeGrBl"),kleuren)
which outputs
['Ro']
['Ro', 'Ge']
['Ro', 'Ge', 'Gr']
['Ro', 'Ge', 'Gr', 'Bl']
Rood
0
Geel
1
Groen
2
Blauw
3
With the answer from the comment linked below, your splitArray function simply becomes:
def splitArray(string):
return [string[i:i+2] for i in range(0, len(string), 2)]
I have a task for my college (I am beginner), which asks you to validate a password using ASCII characters. I tried using simple code and it worked, however it kept skipping my ASCII part.
Requirement list:
1.4 Call function to get a valid password OUT: password
1.4.1 Loop until password is valid
1.4.2 Ask the user to enter a password
1.4.3 Check that the first character is a capital letter (ASCII values 65 to 90)
1.4.4 Check that the last character is #, $ or % (ASCII values 35 to 37) 1.4.5 Return a valid password
U = [65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90]
upCase = ''.join(chr(i) for i in U)
print(upCase) #Ensure it is working
def passVal(userPass):
SpecialSym = ["#", "$", "%"]
val = True
#Common way to validate password VVV
if len(userPass) < 8:
print("Length of password should be at least 8")
val = False
if not any(char.isdigit() for char in userPass):
print("Password should have at least one numeral")
val = False
#I Tried same with ASCII (and other methods too) but it seemed to be skipping this part VVV
if not any(upCase for char in userPass):
print("Password should have at least one uppercase letter")
val = False
if not any(char.islower() for char in userPass):
print("Password should have at least one lowercase letter")
val = False
if not any(char in SpecialSym for char in userPass):
print("Password should have at least on fo the symbols $%#")
val = False
if val:
return val
def password():
if (passVal(userPass)):
print("Password is valid")
else:
print("Invalid Password !!")
userPass = input("Pass: ")
password()
From Python 3.7 you can use str.isascii()...
>>> word = 'asciiString'
>>> word.isascii()
True
Otherwise you could use:
>>> all([ord(c) < 128 for c in word])
True
Since all ASCII characters have an ordinal (ord) value less than 128 (0 -> 127): https://en.wikipedia.org/wiki/ASCII
So your logic will either be (3.7+):
if word.isascii():
# string is ascii
...
Or:
if all([ord(c) < 128 for c in word]):
# string is ascii
else:
# string contains at least one non-ascii character
I am attempting to monitor several businesses Twitter accounts by exporting tweets to a .csv to look at the positivity or negativity of Tweets that include the name of the business, which will then be visualised.
To make it easier for myself I'm only assigning each Tweet one number, between 1 (negative) - 10 (positive), however the code I've written doesn't give any feedback (remains at 0), gets stuck in a For Loop, or gets a Syntax Error.
Using Jupyter notebook I've tried to create a 10 line If/Elif statement - due to Python not having a case statement, and inserted this code both in the 'get Tweets' method as well as the 'write csv' method.
Get Tweets
api = tweepy.API(auth)
query = "ASOS"
language = "en"
results = api.search(q=query, lang=language, count=100)
for tweet in results:
if (not tweet.retweeted) and ('RT #' not in tweet.text):
print(tweet.user.screen_name,"Tweeted:",tweet.text,**rating**)
print()
Write CSV
import csv
api = tweepy.API(auth)
csvFile = open('ASOS with emojis1.csv', 'a')
csvWriter = csv.writer(csvFile)
results = api.search(q=query, lang=language, count=100)
for tweet in results:
if (not tweet.retweeted) and ('RT #' not in tweet.text):
csvWriter.writerow([tweet.created_at, tweet.user.screen_name, tweet.text, **rating**])
csvFile.close()
If/Elif Statement I've written
rating = '0'
if 'abysmal' or 'appalling' or 'dreadful' or 'awful' or 'terrible' or 'very bad' or 'really bad' or 'π‘' or 'π ' or 'π·' in tweet.text:
(rating = '1')
elif 'rubbish' or 'unsatisfactory' or 'bad' or 'poor' or 'π' or 'π' or ':(' or '):' or 'π' in tweet.text:
(rating = '2')
elif 'quite bad' or 'pretty bad' or 'somewhat bad' or 'below average' or 'π' or 'π£' or 'βΉοΈ' or 'π' or 'π’' in tweet.text:
(rating = '3')
elif 'mediocre' or 'π' or 'π' or 'π' or 'π€' or 'πͺ' in tweet.text:
(rating = '4')
elif 'average' or 'not bad' or 'fair' or 'alright' or 'ok' or 'satisfactory' or 'fine' or 'somewhat good' or 'π³' or 'π' or 'π©' or 'π«' or 'π' or 'π±' or 'π¬' or 'omg' in tweet.text:
(rating = '5')
elif 'quite good' or 'decent' or 'above average' or 'pretty good' or 'good' or 'π' or 'πͺ' or 'π
' or 'π' or 'π' in tweet.text:
(rating = '6')
elif 'great' or 'gr8' or 'really good' or 'rlly good' or 'very good' or 'v good' or 'π' or 'βΊοΈ' or 'π' or 'π' or 'π' or 'π' or 'π' ':)' or '(:' or 'π₯' or 'π' or 'π€£' or 'π€' or 'π' in tweet.text:
(rating = '7')
elif 'awesome' or 'fantastic' or 'π' or 'π' or 'π' or 'π' or 'β€' or 'β₯' or 'π' or 'π' or 'β
' or 'π' or 'π€' or 'π' or 'β¨' in tweet.text:
(rating = '8')
elif 'superb' or 'brilliant' or 'incredible' or 'excellent' or 'oustanding' or 'π' or 'π' or 'π₯°' or 'π―' in tweet.text:
(rating = '9')
elif 'perfect' in tweet.text:
(rating = '10')
else:
(rating = 'N/A')
Expected: Produces .csv file with various numbers in
Actual: (rating = '1') SyntaxError: invalid syntax
Your conditionals are not working properly. To chain a conditional:
mylist = [1, 2, 3]
# Note that the full condition must be specified for
# each desired conditional
if 1 in mylist or 2 in mylist or 3 in mylist:
print("True")
# True
The issue with what you are using is that you are approaching the logic the way you would say it rather than how the interpreter reads it. As an example:
if 'a' or 'b':
print('True')
# True
Populated strings act as True and will evaluate your condition as such, so a modification should be made such that the entire conditional is specified:
# Evaluates to True, though it's not what you want
if 'a' and 'b' in 'bc':
print(True) # This is not what you want, but 'a' is read as 'true'
# True
if 'a' in 'bc' and 'b' in 'bc':
print(True)
# Doesn't print True because 'a' in 'bc' is False
The any function could help here, as it will look if any of the values evaluate to True:
mylist = [1, 2, 3]
if any([i in mylist for i in range(2,5)]):
print("True")
# True
Furthermore, there's no need for the parentheses around variable assignment:
if 'abysmal' in tweet.text or 'horrible' in tweet.text:
rating = 0
elif ...:
rating = 1
# So on and so forth
So given a set of chars and length
s = set('abc')
l = 5
How can I ensure that a string doesn't contain substrings like
abcab
aaaaa
Length needs to be around 60 so I can't just generate all substrings.
You can iterate through each character of the string and keep track of the previous number of characters that are elements of s.
def hasSubstring(s, l):
length = 0
for c in str:
if c in s:
length += 1
else:
length = 0
if length > l:
return True
return False
What about using product and list-comprehension.
from itertools import product
s = set('abc')
l = 5
omit = ['abcab','aaaaa']
def sorter(s,l,omit):
s= ''.join(list(s))
unsrted = [''.join(it) for it in list(product(s,repeat=l))]
filrted = [value for value in unsrted if value not in omit]#just filter here based on the list omit
return filrted
print sorter(s, l, omit)
Output-
['aaaac', 'aaaab', 'aaaca', 'aaacc', 'aaacb', 'aaaba', 'aaabc', 'aaabb', 'aacaa', 'aacac', 'aacab', 'aacca', 'aaccc', 'aaccb', 'aacba', 'aacbc', 'aacbb', 'aabaa', 'aabac', 'aabab', 'aabca', 'aabcc', 'aabcb', 'aabba', 'aabbc', 'aabbb', 'acaaa', 'acaac', 'acaab', 'acaca', 'acacc', 'acacb', 'acaba', 'acabc', 'acabb', 'accaa', 'accac', 'accab', 'accca', 'acccc', 'acccb', 'accba', 'accbc', 'accbb', 'acbaa', 'acbac', 'acbab', 'acbca', 'acbcc', 'acbcb', 'acbba', 'acbbc', 'acbbb', 'abaaa', 'abaac', 'abaab', 'abaca', 'abacc', 'abacb', 'ababa', 'ababc', 'ababb', 'abcaa', 'abcac', 'abcca', 'abccc', 'abccb', 'abcba', 'abcbc', 'abcbb', 'abbaa', 'abbac', 'abbab', 'abbca', 'abbcc', 'abbcb', 'abbba', 'abbbc', 'abbbb', 'caaaa', 'caaac', 'caaab', 'caaca', 'caacc', 'caacb', 'caaba', 'caabc', 'caabb', 'cacaa', 'cacac', 'cacab', 'cacca', 'caccc', 'caccb', 'cacba', 'cacbc', 'cacbb', 'cabaa', 'cabac', 'cabab', 'cabca', 'cabcc', 'cabcb', 'cabba', 'cabbc', 'cabbb', 'ccaaa', 'ccaac', 'ccaab', 'ccaca', 'ccacc', 'ccacb', 'ccaba', 'ccabc', 'ccabb', 'cccaa', 'cccac', 'cccab', 'cccca', 'ccccc', 'ccccb', 'cccba', 'cccbc', 'cccbb', 'ccbaa', 'ccbac', 'ccbab', 'ccbca', 'ccbcc', 'ccbcb', 'ccbba', 'ccbbc', 'ccbbb', 'cbaaa', 'cbaac', 'cbaab', 'cbaca', 'cbacc', 'cbacb', 'cbaba', 'cbabc', 'cbabb', 'cbcaa', 'cbcac', 'cbcab', 'cbcca', 'cbccc', 'cbccb', 'cbcba', 'cbcbc', 'cbcbb', 'cbbaa', 'cbbac', 'cbbab', 'cbbca', 'cbbcc', 'cbbcb', 'cbbba', 'cbbbc', 'cbbbb', 'baaaa', 'baaac', 'baaab', 'baaca', 'baacc', 'baacb', 'baaba', 'baabc', 'baabb', 'bacaa', 'bacac', 'bacab', 'bacca', 'baccc', 'baccb', 'bacba', 'bacbc', 'bacbb', 'babaa', 'babac', 'babab', 'babca', 'babcc', 'babcb', 'babba', 'babbc', 'babbb', 'bcaaa', 'bcaac', 'bcaab', 'bcaca', 'bcacc', 'bcacb', 'bcaba', 'bcabc', 'bcabb', 'bccaa', 'bccac', 'bccab', 'bccca', 'bcccc', 'bcccb', 'bccba', 'bccbc', 'bccbb', 'bcbaa', 'bcbac', 'bcbab', 'bcbca', 'bcbcc', 'bcbcb', 'bcbba', 'bcbbc', 'bcbbb', 'bbaaa', 'bbaac', 'bbaab', 'bbaca', 'bbacc', 'bbacb', 'bbaba', 'bbabc', 'bbabb', 'bbcaa', 'bbcac', 'bbcab', 'bbcca', 'bbccc', 'bbccb', 'bbcba', 'bbcbc', 'bbcbb', 'bbbaa', 'bbbac', 'bbbab', 'bbbca', 'bbbcc', 'bbbcb', 'bbbba', 'bbbbc', 'bbbbb']