Creating a list of lists with regular expressions in python - python

I thought I had successfully created and filtered a list of lists using regular expression in python. However, when I attempt to index the lists I just index the first item in each of the lists. Upon closer inspection I noticed that I don't have any commas between my lists. I'm wondering how I can turn each of these individual lists into a list of lists?
I want to do this so that I can reference the different lists and state whether the lists meets a specific criteria.
import re
list_of_strings = ['''<z><x><c></v></b></n>''',
'''<paa>mnb<ore>mnbczx</bar><e>poiuy</e></paa>''',
'''<paa><ore></lan></ore></paa>''',
'''<paa><ore></ore></paa></paa>''',
'''<paa><ore></paa></ore>''']
def valid_html(list_of_strings):
matches = [[s] for s in list_of_strings]
lst = []
for item in matches:
tagsRegex = re.compile(r'(<.{0,3}>|</.{0,3}>)')
lst = (tagsRegex.findall(str(item)))
find = re.compile(r'(<)|(>)')
no_tags = [find.sub('', t) for t in lst]
print(no_tags)
print(no_tags[0])
valid_html(test_strings)
My output is:
valid_html(test_strings)
['z', 'x', 'c', '/v', '/b', '/n']
z
['paa', 'ore', '/ore', 'e', '/e', '/paa']
paa
['paa', 'ore', '/lan', '/ore', '/paa']
paa
['paa', 'ore', '/ore', '/paa', '/paa']
paa
['paa', 'ore', '/paa', '/ore']
paa
Thank you for your time!

You are inserting inside the loop and printing inside the loop. You need to print outside the for loop of need to return the same
def valid_html(list_of_strings):
matches = [[s] for s in list_of_strings]
lst = []
l=[]
for item in matches:
tagsRegex = re.compile(r'(<.{0,3}>|</.{0,3}>)')
lst = (tagsRegex.findall(str(item)))
find = re.compile(r'(<)|(>)')
no_tags = [find.sub('', t) for t in lst]
l.append(no_tags)
return l
valid_html(list_of_strings)[0]

Related

how to convert part of a list to a nested list in python? only use builtin functions

I'm just wondering how to convert a list of strings into a nested list? without module import.
for example:
input ["ABC","1","x","y","z"]
output ["ABC","1",["x","y","z"]]
In this particular case you could do this:
mylist = ["ABC","1","x","y","z"]
offset = 2
newlist = mylist[:offset] + [mylist[offset:]]
print(newlist)
Output:
['ABC', '1', ['x', 'y', 'z']]
Try:
mylist = ["ABC","1","x","y","z"]
def nest(mylist, n):
"""Nests list from nth element"""
nest = [x for x in mylist[n:]]
mylist = mylist[:n]
mylist.append(nest)
return mylist
nest(mylist, 2)

String manipulation with python and storing alphabets and digits in separate lists

For a given string s='ab12dc3e6' I want to add 'ab' and '12' in two different lists. that means for output i am trying to achieve as temp1=['ab','dc','e'] and for temp2=['12,'3','6'].
I am not able to do so with the following code. Can someone provide an efficient way to do it?
S = "ab12dc3e6"
temp=list(S)
x=''
temp1=[]
temp2=[]
for i in range(len(temp)):
while i<len(temp) and (temp[i] and temp[i+1]).isdigit():
x+=temp[i]
i+=1
temp1.append(x)
if not temp[i].isdigit():
break
You can also solve this without any imports:
S = "ab12dc3e6"
def get_adjacent_by_func(content, func):
"""Returns a list of elements from content that fullfull func(...)"""
result = [[]]
for c in content:
if func(c):
# add to last inner list
result[-1].append(c)
elif result[-1]: # last inner list is filled
# add new inner list
result.append([])
# return only non empty inner lists
return [''.join(r) for r in result if r]
print(get_adjacent_by_func(S, str.isalpha))
print(get_adjacent_by_func(S, str.isdigit))
Output:
['ab', 'dc', 'e']
['12', '3', '6']
you can use regex, where you group letters and digits, then append them to lists
import re
S = "ab12dc3e6"
pattern = re.compile(r"([a-zA-Z]*)(\d*)")
temp1 = []
temp2 = []
for match in pattern.finditer(S):
# extract words
#dont append empty match
if match.group(1):
temp1.append(match.group(1))
print(match.group(1))
# extract numbers
#dont append empty match
if match.group(2):
temp2.append(match.group(2))
print(match.group(2))
print(temp1)
print(temp2)
Your code does nothing for isalpha - you also run into IndexError on
while i<len(temp) and (temp[i] and temp[i+1]).isdigit():
for i == len(temp)-1.
You can use itertools.takewhile and the correct string methods of str.isdigit and str.isalpha to filter your string down:
S = "ab12dc3e6"
r = {"digit":[], "letter":[]}
from itertools import takewhile, cycle
# switch between the two test methods
c = cycle([str.isalpha, str.isdigit])
r = {}
i = 0
while S:
what = next(c) # get next method to use
k = ''.join(takewhile(what, S))
S = S[len(k):]
r.setdefault(what.__name__, []).append(k)
print(r)
Output:
{'isalpha': ['ab', 'dc', 'e'],
'isdigit': ['12', '3', '6']}
This essentially creates a dictionary where each seperate list is stored under the functions name:
To get the lists, use r["isalpha"] or r["isdigit"].

How to remove all duplicates in python list other than keywords in a separate list?

I'm trying to remove all duplicates from a python list, other than keywords that are stored in another list.
For example:
a = ['a','a','b','b','c','c']
keywords = ['a','b']
some_func(a,keywords) = ['a','a','b','b','c']
How could I do this in the most pythonic way possible?
This is one approach using a simple iteration.
a = ['a','a','b','b','c','c']
keywords = ['a','b']
def removeDup(a, keywords):
res = []
for i in set(a):
if i in keywords:
res.extend([i]*a.count(i))
else:
res.append(i)
return res
print(removeDup(a, keywords))
Output:
['a', 'a', 'c', 'b', 'b']
Note: This might break the order of the output.

most pythonic way to compare substrings l in list L to string S & edit S according to l in L?

The list ['a','a #2','a(Old)'] should become {'a'} because '#' and '(Old)' are to be excised and a list of duplicates isn't needed. I struggled to develop a list comprehension with a generator and settled on this since I knew it'd work and valued time more than looking good:
l = []
groups = ['a','a #2','a(Old)']
for i in groups:
if ('#') in i: l.append(i[:i.index('#')].strip())
elif ('(Old)') in i: l.append(i[:i.index('(Old)')].strip())
else: l.append(i)
groups = set(l)
What's the slick way to get this result?
Here is general solution, if you want to clean elements of list lst from parts in wastes:
lst = ['a','a #2','a(Old)']
wastes = ['#', '(Old)']
cleaned_set = {
min([element.split(waste)[0].strip() for waste in wastes])
for element in arr
}
You could write this whole expression in a single set comprehension
>>> groups = ['a','a #2','a(Old)']
>>> {i.split('#')[0].split('(Old)')[0].strip() for i in groups}
{'a'}
This will get everything preceding a # and everything preceding '(Old)', then trim off whitespace. The remainder is placed into a set, which only keeps unique values.
You could define a helper function to apply all of the splits and then use a set comprehension.
For example:
lst = ['a','a #2','a(Old)', 'b', 'b #', 'b(New)']
splits = {'#', '(Old)', '(New)'}
def split_all(a):
for s in splits:
a = a.split(s)[0]
return a.strip()
groups = {split_all(a) for a in lst}
#{'a', 'b'}

Sum of lists for each element of list1 with all in list2

I want make script that reads lines from file, than takes slices from each line, combines all slices from 1 line with all slices from 2 line, then combines all slices from previous step with 3rd line.
For example, we have
Stackoverflow (4)
python (3)
question (3)
I get first list with slices of (number) letters.
lst = ['Stac', 'tack', 'acko', 'ckov', 'kove', 'over', 'verf', 'erfl', 'rflo', 'flow']
Then i need to combine it with second list:
lst = ['pyt', 'yth', 'tho', 'hon']
Desired output:
finallist = ['Stacpyt', 'tackpyt', 'ackopyt', 'ckovpyt', 'kovepyt', 'overpyt', 'verfpyt', 'erflpyt', 'rflopyt', 'flowpyt' 'Stacyth', 'tackyth', 'ackoyth', 'ckovyth', 'koveyth', 'overyth', 'verfyth', 'erflyth', 'rfloyth', 'flowyth', ..... , 'erflhon', 'rflohon', 'flowhon']
then with 3rd list:
lst = ['que', 'ues', 'est', 'sti', 'tio', 'ion']
finallist = ['Stacpytque', 'tackpytque', 'ackopytque', 'ckovpytque', 'kovepytque', 'overpytque', 'verfpytque', 'erflpytque', 'rflopytque', .... 'erflhonion', 'rflohonion', 'flowhonion']
I stuck at point where I need to make finallist with combined results.
I am trying pieces of code like this, but its wrong:
for i in lst:
for y in finallist:
finallist.append(i + y)
So if finallist is empty - it should copy lst in first loop iteration, and if finallist is not empty it should combine each element with lst and so on.
I used re.match() in order to get the word and the integer value from your file.
Then, I compute all the sliced subwords and add them to a list, which is then added to a global list.
Finally, I compute all the possibilties you are looking for thank to itertools.product() which behaves like a nested for-loop.
Then, .join() the tuples obtained and you get the final list you wanted.
from itertools import product
from re import match
the_lists = []
with open("filename.txt", "r") as file:
for line in file:
m = match(r'(.*) \((\d+)\)', line)
word = m.group(1)
num = int(m.group(2))
the_list = [word[i:i+num] for i in range(len(word) - num + 1)]
the_lists.append(the_list)
combinaisons = product(*the_lists)
final_list = ["".join(c) for c in combinaisons]
Use ittertools
import itertools
list1 = ['Stac', 'tack', 'acko', 'ckov', 'kove', 'over', 'verf', 'erfl', 'rflo', 'flow']
list2 = ['pyt', 'yth', 'tho', 'hon']
list3 = ['que', 'ues', 'est', 'sti', 'tio', 'ion']
final_list = list(itertools.product(list(itertools.product(list1,list2)),list3))
This will give you all combinations, then you can just join all of them to get your string.
import itertools
def combine(lst):
result = list(itertools.product(*lst))
result = [''.join(item) for item in result]
return result
list1 = ['Stac', 'tack', 'acko', 'ckov', 'kove', 'over', 'verf', 'erfl', 'rflo', 'flow']
list2 = ['pyt', 'yth', 'tho', 'hon']
list3 = ['que', 'ues', 'est', 'sti', 'tio', 'ion']
lst = [list1, list2, list3] # append more list to lst, then pass lst to combination
print combine(lst)
Append all of the candidate lists to lst, and the combine() function will generate all kinds of combinations and then returns the result as a list.

Categories

Resources