Python: Input variable for my "matching" function [duplicate] - python

This question already has an answer here:
Python: How to call a long function containing hundreds of lists in a short form
(1 answer)
Closed 5 years ago.
I have a lot of lists such as:
ABCC8 = ['TRIM29', 'IGL#', 'DOCK6', 'SVEP1', 'S100A11', 'EPHA2', 'KLHL7', 'ANXA3', 'NAB1', 'CELF2', 'EDNRB', 'PLAGL1', 'IL6ST', 'S100A8', 'CKLF', 'TIPARP', 'CDH3', 'MAP3K8', 'LYST', 'LEPR', 'FHL2', 'ARL4C', 'IL1RN', 'ESR1', 'CD93', 'ATP2B4', 'KAT2B', 'ELOVL5', 'SCD', 'SPTBN1', 'AKAP13', 'LDLR', 'ADRB2', 'LTBP4', 'TGM2', 'TIMP3', 'RAN', 'LAMA3', 'ASPH', 'ID4', 'STX11', 'CNN2', 'EGR1']
APP = ['GULP1', 'PREPL', 'FHL1', 'METTL7A', 'TRIM13', 'YPEL5', 'PTEN', 'FAM190B', 'GSN', 'UBL3', 'PTGER3', 'COBLL1', 'EPB41L3', 'KLF4', 'BCL2L2', 'CYLD', 'SLK', 'ENSA', 'SKAP2', 'FBXO3', 'PDCD4', 'ATP2A2', 'AKAP11', 'PAFAH1B1', 'RALGAPA1', 'YWHAZ', 'BNIP3L', 'ATP8A1', 'TNXB', 'DICER1', 'C17orf91', 'BEX4', 'PPM1A', '2017-09-10', 'NDRG2', 'NCOA1', 'NAB1', 'STX7', 'ZFAND5', 'CD47', 'SFRS5', 'CLASP2', 'PBX1', 'NR3C1', 'ABCA8', 'ETFDH', 'RBPMS', 'FOXO1', 'KLF6', 'ADH1B', 'RAB22A', 'CCNG2', 'NFIB', 'IDS', 'NR3C2', 'MAF', 'NDEL1', 'EZR', 'PCDH9', 'KIAA0494', 'CITED2', 'MGEA5', 'RUFY3', 'ALDH3A2', 'N4BP2L2', 'EPS15', 'TSPAN5', 'SNRPN', 'SSBP2', 'ELOVL5', 'C5orf4', 'FOXN3', 'ABCA5', 'SEC62', 'PELI1', 'MYCBP2', 'USP15', 'TACC1', 'SHMT1', 'RNF103', 'CDC14B', 'SYNE1', 'NDN', 'PHKB', 'EIF1', 'TROVE2', 'MBD4', 'GAB1']
BECN1 = ['LMNA', 'NHP2L1', 'IDS', 'ATP6V0B', 'ENSA', 'TBCB', 'NDUFA13', 'TOLLIP', 'PLEKHB2', 'MBOAT7', 'C16orf13', 'PGAM1', 'MIF', 'ACTR1A', 'OAZ1', 'GNAS', 'ARF1', 'MAPKAPK3', 'LCMT1', 'ATP6V1D', 'FLOT1', 'PRR13', 'COX5B', 'PGP', 'CYB561', 'CNIH4', 'COX6B1', 'ARPC5L', 'NCKIPSD', 'C9orf16', 'LSM4', 'ATP5L', 'C14orf2', 'AURKAIP1', 'MRPL41', 'PDPK1', 'NOP10', 'CANT1', 'CALM3', 'PSEN2', 'C9orf86', 'ATP6V0E1', 'PIN1', 'LARP1', 'HTATIP2', 'PPP1R7', 'HCFC1R1', 'UQCR10', 'FAM134A', 'GPAA1', 'THY1', 'PPM1A', 'NAPA', 'NDUFC2', 'EPS8L1', 'PSME2', 'UBE2M', 'ORMDL2', 'TCEB2', 'RMND5B', 'ATPIF1', 'RNF19B', 'PEBP1', 'PCBP2', 'GHITM', 'AP3S2', 'TSPAN5', 'AP2S1', 'C20orf24', 'RABIF', 'NDUFB2', 'PFDN2', 'GPR172A', 'RTN4', 'GAPDH', 'MAPK13', 'FKBP8', 'PTGER3', 'BSCL2', 'TUBG1', 'FAM162A', 'GDI1', 'SPTLC2', 'YWHAZ', 'BCAP31', 'OSBPL1A', 'ATP6AP1', 'CALM1', 'PEX16', 'MYCBP2']
ARNTL = ['NCAM1', 'SLC11A2', 'RPL35A', 'PDLIM5', 'RPL31', 'NFIB', 'GYG2', 'IGHG1', 'NAAA', 'DLC1', 'EPOR', 'DIO2', 'ESR1', 'KLK10', 'CYP2C9', 'SPN', 'RPS9', 'PRELP', 'CYP3A43', 'PLAGL1', 'COBLL1', 'ADCK2', 'RPL13', 'NRP2', 'SCEL', 'DOCK6', 'NENF', 'MLLT4', 'SERPINB13', 'PALMD', 'TMEM132A', 'ASAP3', 'MTAP', 'NOVA1', 'ALOX12', 'SPINK5', 'LDB3', 'ATP5S', 'LMNA', 'BAIAP2', 'FZD4', 'GNAS', 'OBSL1', 'TCL6', 'ICOSLG', 'MACROD2', 'MAST4', 'EDA', 'ADAM22', 'CSHL1', 'SYNGR1', 'THBS1', 'PEX16', 'NOS1', 'SLCO1A2', 'CYP2A7', 'PRDM2', 'DTNA', 'HSD17B4', 'RPL29', 'PDCD4', 'IL1RN', 'CASZ1', 'C9orf16', 'RGS12', 'TRD#', 'ATP1A2', 'MPRIP', 'PDE4C', 'SPTLC2', 'TNXB', 'DDAH2', 'AOX1', 'PAIP2B', 'HNF4A', 'GLS', 'EMP1', 'ARHGEF4', 'FUT6', 'ACACB', 'NR5A2', 'N4BP2L1', 'APAF1', 'DSC2', 'EDNRB', 'RPL27A', 'CYP2C18']
I have a function which returns me the number of matches of same strings among the different lists of strings compared to my reference (`ref`) list.
def sort_by_matches(ref, lists):
reference = set(ref)
lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
for matches, name, a_list in lists:
print("Matches {} in {}".format(matches, name))
sort_by_matches(APP, [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL), ("BMI1", BMI1), ("CASP8", CASP8), ("CASP9", CASP9), ("CLOCK", CLOCK), ("CRAT", CRAT), ("CRY2", CRY2), ("CSF1", CSF1), ("CTCF", CTCF), ("DNMT1", DNMT1), ("EP300", EP300), ("FBXW7", FBXW7), ("FOXA1", FOXA1), ("FOXO1", FOXO1), ("FOXO3", FOXO3), ("GADD34", GADD34), ("GATA3", GATA3), ("GCK", GCK), ("GLI1", GLI1), ("GLP1", GLP1), ("GLP1R", GLP1R), ("GLUT1", GLUT1),("GLUT2", GLUT2),("HES1", HES1),("HEY1", HEY1),("HIF1A", HIF1A),("HNF1A", HNF1A),("HNF4A", HNF4A),("ICMT", ICMT),("ID1", ID1),("IDH1", IDH1),("IL4", IL4),("IL6", IL6),("LC3A", LC3A),("LYL1", LYL1),("MFSD2A", MFSD2A),("MOAP1", MOAP1),("MTNR1B", MTNR1B),("MTOR", MTOR),("MYF5", MYF5),("MYOD1", MYOD1),("MSTN", MSTN),("NANOG", NANOG),("NOTCH1", NOTCH1),("NR1D1", NR1D1),("POU5F1", POU5F1),("PAX7", PAX7),("PDK1", PDK1),("PER2", PER2),("PHF6", PHF6),("PRMT5", PRMT5),("PSEN1", PSEN1),("PSEN2", PSEN2),("PTCH1", PTCH1),("RMST", RMST),("RUNX1", RUNX1),("SETD2", SETD2),("SIN3A", SIN3A),("SOCS1", SOCS1),("SOX2", SOX2),("STAT3", STAT3),("STK11", STK11),("TAF1", TAF1),("TCF3", TCF3),("TEAD1", TEAD1), ("TERT", TERT),("RANKL", RANKL),("TOP2A", TOP2A), ("TOX3", TOX3), ("TRIM28", TRIM28), ("TSHZ2", TSHZ2), ("TSHZ3", TSHZ3), ("TSP1", TSP1), ("TWIST1", TWIST1), ("FN1", FN1), ("VHL", VHL), ("WLS", WLS), ("WNT3", WNT3), ("WNT3A", WNT3A), ("WNT5A", WNT5A), ("WT1", WT1), ("YAP1", YAP1), ('MYBPC3', MYBPC3), ("PPARG", PPARG), ("NKD1", NKD1), ("LRP5", LRP5), ("SMO", SMO), ("CSNK1E", CSNK1E), ("DKK1", DKK1), ("MYH7", MYH7), ("AXIN2", AXIN2), ("TCF7", TCF7), ("NEUROD1", NEUROD1), ("FZD5", FZD5), ("FZD8", FZD8), ("CREB1", CREB1), ("TCF7L2", TCF7L2), ("SOX17", SOX17), ("TP53", TP53), ("PTGER3", PTGER3), ("FERMT2", FERMT2), ("WNT1", WNT1), ("WNT7B", WNT7B), ("MDM4", MDM4), ("IL10", IL10 ), ("DVL1", DVL1 ), ("PGR", PGR), ("TSC1", TSC1), ("ASCL2", ASCL2)])
How can I use 'input' variable for the ref in my function sort_by_matches(ref, lists) instead of copy-pasting different my function every time with different reference.
Copy pasting the above matching function is too long, since I have hundreds of lists. How can I solve this problem?

There is probably a more succinct way of doing this, as I am new to Python, but I made the following modifications:
The lists you have defined are put into a list of tuples, with the first item in the tuple representing the name of the list, and the second item referencing the list you have already defined (similar to how you're calling your function already):
myLists = [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL)]
I modified the ref parameter so your function sort_by_matches expects a tuple for that parameter (this is so you can reference the name of the list, if needed):
def sort_by_matches(ref, lists):
reference = set(ref[1])
lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
for matches, name, a_list in lists:
print("{} Matches {} in {}".format(ref[0],matches, name))
Next, loop over myLists, calling sort_by_matches for the reference list and all lists following it:
i = 0
while(i < len(myLists) - 1):
sort_by_matches(myLists[i], myLists[i + 1:])
i = i + 1
This compares each list to every other list.
The output looks like this:
ABCC8 Matches 5 in ARNTL
ABCC8 Matches 2 in APP
ABCC8 Matches 0 in BECN1
APP Matches 7 in BECN1
APP Matches 4 in ARNTL
BECN1 Matches 5 in ARNTL
EDIT
You will need a way to reference all the lists some way since they are contained within the code. Below I am storing references to your lists in the variable MY_LISTS. You would just need to add an entry to this variable each time you define another list in your program. (However you decide to implement this, since the lists are defined explicitly in the code, there will need to be a collection referencing the lists to avoid having to reference all the lists explicitly each time you call your function.)
No modifications have been made to the sort_by_matches function. Instead, in the while loop, the name of the current reference list is printed out so you'll know which list the other lists are being compared to.
The loop starts with the first list in MY_LISTS, comparing it to all subsequent lists in MY_LISTS. In each iteration of the loop, the reference list is only compared to the subsequent lists in MY_LISTS. This makes it so a comparison is only performed on every possible pair of lists once.
ABCC8 = ['TRIM29', 'IGL#', 'DOCK6', 'SVEP1', 'S100A11', 'EPHA2', 'KLHL7', 'ANXA3', 'NAB1', 'CELF2', 'EDNRB', 'PLAGL1', 'IL6ST', 'S100A8', 'CKLF', 'TIPARP', 'CDH3', 'MAP3K8', 'LYST', 'LEPR', 'FHL2', 'ARL4C', 'IL1RN', 'ESR1', 'CD93', 'ATP2B4', 'KAT2B', 'ELOVL5', 'SCD', 'SPTBN1', 'AKAP13', 'LDLR', 'ADRB2', 'LTBP4', 'TGM2', 'TIMP3', 'RAN', 'LAMA3', 'ASPH', 'ID4', 'STX11', 'CNN2', 'EGR1']
APP = ['GULP1', 'PREPL', 'FHL1', 'METTL7A', 'TRIM13', 'YPEL5', 'PTEN', 'FAM190B', 'GSN', 'UBL3', 'PTGER3', 'COBLL1', 'EPB41L3', 'KLF4', 'BCL2L2', 'CYLD', 'SLK', 'ENSA', 'SKAP2', 'FBXO3', 'PDCD4', 'ATP2A2', 'AKAP11', 'PAFAH1B1', 'RALGAPA1', 'YWHAZ', 'BNIP3L', 'ATP8A1', 'TNXB', 'DICER1', 'C17orf91', 'BEX4', 'PPM1A', '2017-09-10', 'NDRG2', 'NCOA1', 'NAB1', 'STX7', 'ZFAND5', 'CD47', 'SFRS5', 'CLASP2', 'PBX1', 'NR3C1', 'ABCA8', 'ETFDH', 'RBPMS', 'FOXO1', 'KLF6', 'ADH1B', 'RAB22A', 'CCNG2', 'NFIB', 'IDS', 'NR3C2', 'MAF', 'NDEL1', 'EZR', 'PCDH9', 'KIAA0494', 'CITED2', 'MGEA5', 'RUFY3', 'ALDH3A2', 'N4BP2L2', 'EPS15', 'TSPAN5', 'SNRPN', 'SSBP2', 'ELOVL5', 'C5orf4', 'FOXN3', 'ABCA5', 'SEC62', 'PELI1', 'MYCBP2', 'USP15', 'TACC1', 'SHMT1', 'RNF103', 'CDC14B', 'SYNE1', 'NDN', 'PHKB', 'EIF1', 'TROVE2', 'MBD4', 'GAB1']
BECN1 = ['LMNA', 'NHP2L1', 'IDS', 'ATP6V0B', 'ENSA', 'TBCB', 'NDUFA13', 'TOLLIP', 'PLEKHB2', 'MBOAT7', 'C16orf13', 'PGAM1', 'MIF', 'ACTR1A', 'OAZ1', 'GNAS', 'ARF1', 'MAPKAPK3', 'LCMT1', 'ATP6V1D', 'FLOT1', 'PRR13', 'COX5B', 'PGP', 'CYB561', 'CNIH4', 'COX6B1', 'ARPC5L', 'NCKIPSD', 'C9orf16', 'LSM4', 'ATP5L', 'C14orf2', 'AURKAIP1', 'MRPL41', 'PDPK1', 'NOP10', 'CANT1', 'CALM3', 'PSEN2', 'C9orf86', 'ATP6V0E1', 'PIN1', 'LARP1', 'HTATIP2', 'PPP1R7', 'HCFC1R1', 'UQCR10', 'FAM134A', 'GPAA1', 'THY1', 'PPM1A', 'NAPA', 'NDUFC2', 'EPS8L1', 'PSME2', 'UBE2M', 'ORMDL2', 'TCEB2', 'RMND5B', 'ATPIF1', 'RNF19B', 'PEBP1', 'PCBP2', 'GHITM', 'AP3S2', 'TSPAN5', 'AP2S1', 'C20orf24', 'RABIF', 'NDUFB2', 'PFDN2', 'GPR172A', 'RTN4', 'GAPDH', 'MAPK13', 'FKBP8', 'PTGER3', 'BSCL2', 'TUBG1', 'FAM162A', 'GDI1', 'SPTLC2', 'YWHAZ', 'BCAP31', 'OSBPL1A', 'ATP6AP1', 'CALM1', 'PEX16', 'MYCBP2']
ARNTL = ['NCAM1', 'SLC11A2', 'RPL35A', 'PDLIM5', 'RPL31', 'NFIB', 'GYG2', 'IGHG1', 'NAAA', 'DLC1', 'EPOR', 'DIO2', 'ESR1', 'KLK10', 'CYP2C9', 'SPN', 'RPS9', 'PRELP', 'CYP3A43', 'PLAGL1', 'COBLL1', 'ADCK2', 'RPL13', 'NRP2', 'SCEL', 'DOCK6', 'NENF', 'MLLT4', 'SERPINB13', 'PALMD', 'TMEM132A', 'ASAP3', 'MTAP', 'NOVA1', 'ALOX12', 'SPINK5', 'LDB3', 'ATP5S', 'LMNA', 'BAIAP2', 'FZD4', 'GNAS', 'OBSL1', 'TCL6', 'ICOSLG', 'MACROD2', 'MAST4', 'EDA', 'ADAM22', 'CSHL1', 'SYNGR1', 'THBS1', 'PEX16', 'NOS1', 'SLCO1A2', 'CYP2A7', 'PRDM2', 'DTNA', 'HSD17B4', 'RPL29', 'PDCD4', 'IL1RN', 'CASZ1', 'C9orf16', 'RGS12', 'TRD#', 'ATP1A2', 'MPRIP', 'PDE4C', 'SPTLC2', 'TNXB', 'DDAH2', 'AOX1', 'PAIP2B', 'HNF4A', 'GLS', 'EMP1', 'ARHGEF4', 'FUT6', 'ACACB', 'NR5A2', 'N4BP2L1', 'APAF1', 'DSC2', 'EDNRB', 'RPL27A', 'CYP2C18']
MY_LISTS = [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL)]
def sort_by_matches(ref, lists):
reference = set(ref)
lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
for matches, name, a_list in lists:
print("Matches {} in {}".format(matches, name))
i = 0
while(i < len(MY_LISTS) - 1):
print("Comparing Lists to " + MY_LISTS[i][0])
sort_by_matches(MY_LISTS[i][1], MY_LISTS[i + 1:])
i = i + 1
The output looks like this:
Comparing Lists to ABCC8
Matches 5 in ARNTL
Matches 2 in APP
Matches 0 in BECN1
Comparing Lists to APP
Matches 7 in BECN1
Matches 4 in ARNTL
Comparing Lists to BECN1
Matches 5 in ARNTL

Related

Remove from a list (or DataFrame) substrings contained the same list

I have a list of lists (let's call it IDlist): what i want to do is removing the elements (lists) of IDlist that are "substrings" of other elements (other lists) of IDlist.
It is not necessary to use lists, also Pandas objects are good if it's easier.
The only ways i've come up with work only partially (only in specific scenarios) therefore they are useless.
I really don't know how make the list work on "itself".
Here is a part of the dataset. For example, lines 61,62,63,64. 61,62 and 64 are substrings of 63, so i should keep only the line 63.
56 ['2588446634610274688', '2588446634612110336']
57 ['348020242217448576', '348020448377061376', '348020482735930112']
58 ['565983471644073472', '565989347158652288']
59 ['4912580642524184960', '4912898156569562624']
60 ['318121222523445376', '318121256883850112']
61 ['356731363606425856', '357478894075788928', '357479272034582528']
62 ['356731363606425856', '357478894075788928', '357479272034582528']
63 ['356731363606425856', '356731363608936576', '357478894075788928', '357479272034582528']
64 ['356731363606425856', '356731363608936576', '357478894075788928']
65 ['2512629230496996992', '2512629230497166848']
Print command output:
>>> print(templist)
[['318121222523445376', '318121256883850112'], ['356731363606425856', '357478894075788928', '357479272034582528'], ['356731363606425856', '357478894075788928', '357479272034582528'], ['356731363606425856', '356731363608936576', '357478894075788928', '357479272034582528'], ['356731363606425856', '356731363608936576', '357478894075788928'], ['2512629230496996992', '2512629230497166848']]
The only solution I found is to iterate through IDlist with nested loops and pop subset list from a copy of IDlist
IDlist = [['2588446634610274688', '2588446634612110336'],
['348020242217448576', '348020448377061376', '348020482735930112'],
['565983471644073472', '565989347158652288'],
['4912580642524184960', '4912898156569562624'],
['318121222523445376', '318121256883850112'],
['318121222523445376', '318121256883850112'],
['356731363606425856', '357478894075788928', '357479272034582528'],
['356731363606425856', '357478894075788928', '357479272034582528'],
['356731363606425856', '356731363608936576', '357478894075788928', '357479272034582528'],
['356731363606425856', '356731363608936576', '357478894075788928'],
['2512629230496996992', '2512629230497166848'], ]
def is_subset(a, b):
for i in a:
if i not in b:
return False
return True
new_IDlist = IDlist.copy()
for id_j, j in enumerate(IDlist):
for id_k, k in enumerate(IDlist):
if id_k == id_j:
continue
if len(k) < len(j):
if is_subset(k, j):
for _, l in enumerate(new_IDlist):
if k == l:
new_IDlist.pop(_)
break
else:
if is_subset(j, k):
cnt = 0
for _, l in enumerate(new_IDlist):
if k == l:
if cnt:
new_IDlist.pop(_)
else:
cnt += 1
Output
['2588446634610274688', '2588446634612110336']
['348020242217448576', '348020448377061376', '348020482735930112']
['565983471644073472', '565989347158652288']
['4912580642524184960', '4912898156569562624']
['318121222523445376', '318121256883850112']
['356731363606425856', '356731363608936576', '357478894075788928', '357479272034582528']
['2512629230496996992', '2512629230497166848']
You can check if the current list is a subset or superset of all other lists with a nested loop. Let's see:
def exclude_subsets(data):
cleaned_data = set(map(lambda x: tuple(sorted(x)), data))
superset_list = []
for k,i in enumerate(cleaned_data):
for l,j in enumerate(cleaned_data):
if k != l:
superset_list.append([k,l,set(i).issuperset(j)])
subset_ids = list(map(lambda x: x[1], filter(lambda x: x[2], superset_list)))
return [list(i) for k,i in enumerate(cleaned_data) if k not in subset_ids]
data = [['2588446634610274688', '2588446634612110336'],
['348020242217448576', '348020448377061376', '348020482735930112'],
['565983471644073472', '565989347158652288'],
['4912580642524184960', '4912898156569562624'],
['318121222523445376', '318121256883850112'],
['318121222523445376', '318121256883850112'],
['356731363606425856', '357478894075788928', '357479272034582528'],
['356731363606425856', '357478894075788928', '357479272034582528'],
['356731363606425856', '356731363608936576', '357478894075788928', '357479272034582528'],
['356731363606425856', '356731363608936576', '357478894075788928'],
['2512629230496996992', '2512629230497166848']]
print(exclude_subsets(data))
>>
[['565983471644073472', '565989347158652288'],
['4912580642524184960', '4912898156569562624'],
['356731363606425856','356731363608936576','357478894075788928','357479272034582528'],
['2588446634610274688', '2588446634612110336'],
['348020242217448576', '348020448377061376', '348020482735930112'],
['2512629230496996992', '2512629230497166848'],
['318121222523445376', '318121256883850112']]

how can i smooth the graph values or extract main signals only

when i try to run the code below i get this graph
my code:
from numpy import nan
import json
import os
import numpy as np
import subprocess
import math
import matplotlib.pyplot as plt
from statistics import mean, stdev
def smooth(t):
new_t = []
for i, x in enumerate(t):
neighbourhood = t[max(i-2,0): i+3]
m = mean(neighbourhood)
s = stdev(neighbourhood, xbar=m)
if abs(x - m) > s:
x = ( t[i - 1 + (i==0)*2] + t[i + 1 - (i+1==len(t))*2] ) / 2
new_t.append(x)
return new_t
def outLiersFN(*U):
outliers=[] # after preprocessing list
#preprocessing Fc =| 2*LF1 prev by 1 - LF2 prev by 2 |
c0 = -2 #(previous) by 2 #from original
c1 =-1 #(previous) #from original
c2 =0 #(current) #from original
c3 = 1 #(next) #from original
preP = U[0] # original list
if c2 == 0:
outliers.append(preP[0])
c1+=1
c2+=1
c0+=1
c3+=1
oldlen = len(preP)
M_RangeOfMotion = 90
while oldlen > c2 :
if c3 == oldlen:
outliers.insert(c2, preP[c2]) #preP[c2] >> last element in old list
break
if (preP[c2] > M_RangeOfMotion and preP[c2] < (preP[c1] + preP[c3])/2) or (preP[c2] < M_RangeOfMotion and preP[c2] > (preP[c1] + preP[c3])/2): #Check Paper 3.3.1
Equ = (preP[c1] + preP[c3])/2 #fn of preprocessing # From third index # ==== inserting current frame
formatted_float = "{:.2f}".format(Equ) #with .2 number only
equu = float(formatted_float) #from string float to float
outliers.insert(c2,equu) # insert the preprocessed value to the List
c1+=1
c2+=1
c0+=1
c3+=1
else :
Equ = preP[c2] # fn of preprocessing #put same element (do nothing)
formatted_float = "{:.2f}".format(Equ) # with .2 number only
equu = float(formatted_float) # from string float to float
outliers.insert(c2, equu) # insert the preprocessed value to the List
c1 += 1
c2 += 1
c0 += 1
c3 += 1
return outliers
def remove_nan(list):
newlist = [x for x in list if math.isnan(x) == False]
return newlist
the_angel = [176.04, 173.82, 170.09, 165.3, 171.8, 178.3, 178.77, 179.24, 179.93, 180.0, 173.39, 166.78, 166.03, 165.28, 165.72, 166.17, 166.71, 167.26, 168.04, 167.22, 166.68, 166.13, 161.53, 165.81, 170.1, 170.05, 170.5, 173.01, 176.02, 174.53, 160.09, 146.33, 146.38, 146.71, 150.33, 153.95, 154.32, 154.69, 134.52, 114.34, 115.6, 116.86, 134.99, 153.12, 152.28, 151.43, 151.36, 152.32, 158.9, 166.52, 177.74, 178.61, 179.47, 167.44, 155.4, 161.54, 167.68, 163.96, 160.24, 137.45, 114.66, 117.78, 120.89, 139.95, 139.62, 125.51, 111.79, 112.07, 112.74, 110.22, 107.7, 107.3, 106.52, 105.73, 103.07, 101.35, 102.5, 104.59, 104.6, 104.49, 104.38, 102.81, 101.25, 100.62, 100.25, 100.15, 100.32, 99.84, 99.36, 100.04, 100.31, 99.14, 98.3, 97.92, 97.41, 96.9, 96.39, 95.88, 95.9, 95.9, 96.02, 96.14, 96.39, 95.2, 94.56, 94.02, 93.88, 93.8, 93.77, 93.88, 94.04, 93.77, 93.65, 93.53, 94.2, 94.88, 92.59, 90.29, 27.01, 32.9, 38.78, 50.19, 61.59, 61.95, 62.31, 97.46, 97.38, 97.04, 96.46, 96.02, 96.1, 96.33, 95.61, 89.47, 89.34, 89.22, 89.48, 89.75, 90.02, 90.28, 88.16, 88.22, 88.29, 88.17, 88.17, 94.98, 94.84, 94.69, 94.94, 94.74, 94.54, 94.69, 94.71, 94.64, 94.58, 94.19, 94.52, 94.85, 87.7, 87.54, 87.38, 95.71, 96.57, 97.11, 97.05, 96.56, 96.07, 95.76, 95.56, 95.35, 95.28, 95.74, 96.2, 96.32, 96.33, 96.2, 96.14, 96.07, 96.07, 96.12, 96.17, 96.28, 96.31, 96.33, 96.16, 96.05, 95.94, 95.33, 88.96, 95.0, 95.78, 88.19, 88.19, 88.19, 87.92, 87.93, 88.03, 87.94, 87.86, 87.85, 87.89, 88.08, 88.01, 87.88, 88.02, 88.15, 88.15, 88.66, 88.73, 88.81, 88.41, 88.55, 88.68, 88.69, 88.02, 87.35, 95.19, 95.39, 95.38, 95.37, 95.27, 95.17, 95.33, 95.32, 95.31, 95.37, 95.42, 95.34, 95.44, 95.53, 95.47, 95.41, 95.13, 94.15, 94.78, 97.64, 97.1, 96.87, 97.03, 96.76, 35.44, 23.63, 23.27, 24.71, 26.16, 96.36, 113.13, 129.9, 96.82, 63.74, 34.25, 33.42, 32.6, 30.69, 31.06, 31.43, 97.14, 97.51, 97.23, 98.54, 100.13, 100.95, 28.82, 33.81, 66.81, 99.82, 102.63, 101.9, 101.44, 102.19, 103.22, 103.67, 104.13, 104.07, 104.73, 105.46, 103.74, 102.02, 103.32, 102.59, 29.54, 28.08, 28.76, 29.79, 30.82, 113.51, 129.34, 145.16, 143.18, 148.29, 153.67, 166.14, 161.16, 151.64, 149.27, 146.9, 151.67, 153.02, 149.28, 145.53, 149.1, 152.67, 158.78, 164.89, 164.84, 164.8, 162.11, 159.42, 156.73, 156.28, 155.83, 156.4, 161.0, 165.59, 164.44, 159.73, 155.76, 156.97, 158.92, 159.15, 159.39, 159.99, 160.44, 160.88, 163.89, 166.9, 167.71, 167.11, 167.0, 167.44, 168.38, 153.16, 137.94, 137.65, 152.09, 169.49, 171.36, 173.22, 174.01, 174.0, 174.2, 174.41, 157.74, 141.09, 149.32, 157.57, 156.4, 148.4, 140.78, 141.06, 141.73, 143.05, 143.91, 156.59, 169.29, 172.17, 175.05, 175.29, 175.27, 175.15, 175.02, 174.81, 174.59, 174.76, 174.94, 175.18, 175.41, 175.23, 174.51, 174.64, 174.77, 174.56, 173.25, 172.38, 174.17, 176.4, 177.27, 177.29, 177.33, 178.64, 179.98, 179.99, 176.0, 172.88, 173.77, 173.8, 173.97, 174.72, 175.24, 176.89, 179.07, 179.27, 178.78, 178.29, 175.61, 174.21, 172.8, 173.05, 173.41, 173.77, 174.65, 175.52, 175.58, 176.15, 176.71, 159.12, 141.54, 141.12, 155.62, 170.53, 165.54, 160.71, 158.22, 156.35, 156.82, 158.55, 160.27, 161.33, 162.39, 162.37, 159.48, 156.59, 156.77, 158.05, 159.32, 158.49, 157.66, 157.7, 157.74, 158.44, 159.14, 150.13, 143.06, 136.0, 125.7, 115.41, 111.19, 106.97, 107.1, 107.24, 107.45, 107.67, 113.34, 119.01, 144.87, 170.73, 174.31, 177.89, 174.78, 171.67, 163.26, 134.58, 105.9, 102.98, 100.77, 101.05, 101.39, 101.73, 99.79, 98.71, 97.64, 97.8, 97.89, 96.67, 95.45, 94.33, 93.38, 92.44, 48.53, 91.4, 91.35, 91.34, 91.33, 90.92, 90.51, 88.63, 87.0, 86.74, 86.48, 96.79, 96.09, 95.46, 95.39, 94.32, 93.25, 93.31, 93.37, 93.11, 92.57, 93.41, 94.25, 96.48, 92.71, 88.94, 90.07, 90.43, 78.06, 77.69, 77.32, 90.1, 89.15, 89.14, 88.85, 88.38, 87.63, 121.2, 120.66, 86.89, 86.42, 85.69, 84.86, 84.86, 85.34, 85.82, 86.07, 86.32, 85.82, 85.32, 86.23, 86.69, 87.15, 87.04, 86.87, 86.58, 86.0, 85.41, 85.41, 85.53, 85.66, 85.7, 85.72, 85.75, 85.92, 86.09, 85.77, 85.45, 84.94, 85.55, 86.16, 86.21, 86.1, 85.77, 85.27, 84.56, 84.99, 85.38, 85.42, 85.98, 86.54, 86.5, 86.45, 86.56, 86.63, 86.35, 86.08, 85.82, 85.51, 85.21, 84.6, 84.84, 84.97, 85.1, 86.12, 86.88, 86.8, 86.46, 86.47, 87.23, 87.8, 88.0, 88.08, 88.16, 87.72, 87.63, 87.37, 86.42, 86.48, 87.24, 87.97, 88.09, 88.19, 88.32, 88.44, 87.82, 87.2, 86.03, 85.78, 91.5, 93.0, 88.2, 88.52, 88.42, 87.28, 85.73, 85.62, 85.5, 85.5, 87.06, 87.6, 88.1, 88.31, 88.53, 88.77, 89.14, 89.52, 89.46, 89.4, 90.28, 89.74, 91.28, 92.17, 92.16, 92.15, 93.08, 94.0, 94.66, 95.32, 94.13, 93.7, 93.32, 93.69, 94.58, 95.47, 97.25, 99.03, 99.63, 99.67, 99.71, 100.33, 101.58, 103.36, 103.49, 103.41, 106.31, 109.34, 109.28, 109.21, 107.76, 106.31, 105.43, 104.94, 104.44, 111.19, 117.93, 115.59, 113.24, 116.15, 119.06, 125.43, 140.72, 156.0, 161.7, 143.52, 135.33, 127.13, 127.68, 148.68, 169.68, 172.2, 174.72, 174.75, 174.66, 158.57, 142.63, 145.13, 153.29, 161.45, 163.34, 165.24, 162.25, 159.89, 159.07, 156.39, 155.21, 156.04, 159.29, 160.07, 160.85, 163.45, 162.93, 161.71, 160.06, 158.4, 144.74, 132.64, 134.57, 150.22, 165.86, 172.95, 174.12, 175.3, 175.5, 176.31, 177.71, 179.72, 168.13, 156.55, 146.24, 155.75, 176.0, 175.99, 175.98, 176.0, 176.02, 176.25, 175.13, 174.26, 173.38, 173.37, 173.46, 176.34, 174.55, 172.77, 168.45, 166.35, 166.47, 168.81, 167.43, 166.79, 167.35, 168.65, 168.51, 168.37, 168.88, 169.74, 171.19, 171.33, 169.91, 168.49, 167.11, 166.83, 167.01, 168.68, 170.34, 170.43, 172.15, 173.86, 177.62, 177.61, 175.34, 173.06, 176.47, 179.87, 179.9, 177.67, 175.67, 175.39, 175.36, 177.03, 176.0, 174.98, 174.96, 174.94, 175.76, 176.57, 169.05, 162.99, 164.97, 168.74, 172.51, 167.38, 165.08, 163.03, 163.81, 164.83, 164.81, 164.8, 165.88, 165.36, 159.61, 153.86, 153.57, 153.61, 153.65, 154.62, 155.58, 157.97, 156.35, 155.66, 154.98, 156.11, 157.24, 159.25, 159.6, 160.43, 161.26, 164.71, 168.17, 147.46, 126.92, 106.38, 105.23, 104.4, 105.37, 106.65, 109.21, 107.44, 104.65, 101.86, 102.35, 102.84, 102.79, 102.19, 101.59, 100.98, 100.38, 98.72, 97.73, 97.32, 96.9, 95.11, 93.97, 94.12, 94.12, 93.1, 92.08, 89.29, 90.35, 90.35, 90.35, 90.35, 86.95, 86.37, 86.06, 85.74, 94.56, 93.16, 92.46, 91.76, 88.55, 85.33, 87.52, 92.18, 93.68, 95.18, 94.4, 92.17, 89.94, 89.4, 89.37, 99.44, 100.98, 102.52, 103.18, 88.96, 88.23, 87.5, 85.2, 85.19, 86.87, 121.42, 155.96, 155.97, 155.97, 86.2, 86.5, 86.8, 87.22, 87.36, 87.34, 87.03, 87.04, 87.05, 86.36, 85.68, 85.71, 85.84, 85.93, 86.01, 86.04, 86.08, 85.92, 86.05, 86.18, 86.17, 86.19, 86.23, 86.22, 86.09, 85.92, 85.66, 85.69, 85.69, 85.31, 84.91, 84.93, 84.95, 84.93, 84.91, 84.9, 84.9, 84.9, 84.9, 85.38, 85.52, 85.66, 85.66, 85.4, 85.14, 85.47, 85.8, 85.72, 85.64, 86.09, 85.84, 85.27, 85.47, 85.66, 85.59, 85.52, 85.38, 85.39, 85.28, 85.17, 85.39, 85.7, 85.98, 86.26, 86.61, 92.97, 93.15, 86.58, 86.58, 86.53, 86.47, 98.55, 99.41, 100.16, 100.9, 89.19, 90.28, 91.38, 91.39, 91.4, 91.44, 92.05, 131.05, 170.63, 170.13, 162.43, 125.64, 88.85, 88.85, 99.08, 100.38, 101.69, 100.74, 99.79, 96.33, 93.31, 93.73, 94.87, 96.01, 96.93, 97.85, 98.97, 97.85, 98.14, 99.37, 102.01, 103.8, 105.58, 108.52, 108.12, 107.72, 106.75, 106.82, 109.08, 112.37, 112.52, 112.66, 112.97, 114.12, 115.64, 117.1, 118.57, 126.13, 133.69, 149.27, 163.96, 166.62, 169.27, 164.94, 160.61, 149.35, 141.18, 143.41, 143.57, 149.26, 157.49, 159.94, 151.93, 147.47, 145.97, 145.56, 145.15, 143.85, 142.54, 142.18, 142.43, 143.12, 144.41, 144.38, 151.99, 159.59, 174.81, 174.94, 175.84, 176.87, 162.41, 152.94, 151.59, 155.24, 155.22, 155.19, 155.04]
p0 = outLiersFN(smooth(remove_nan(the_angel)))
the_angel = p0
plt.plot(the_angel) #list(filter(fun, L1))
plt.show()
print((the_angel))
how can i smooth the values in (the_angel) to get graph like this (red line)
i mean ignoring all unnecessary and noisy values and get only main line instead
you can edit my code or suggest me new filter or algorithm
pandas has a rolling() method for dataframes that you can use to calculate the mean over a window of values, e.g. the 70 closest ones:
import pandas as pd
import matplotlib.pyplot as plt
WINDOW_SIZE = 70
the_angel = [176.04, 173.82, 170.09, 165.3, 171.8, # ...
]
df = pd.DataFrame({'the angel': the_angel})
df[f'mean of {WINDOW_SIZE}'] = df['the angel'].rolling(
window=WINDOW_SIZE, center=True).mean()
df.plot(color=['blue', 'red']);

Sudoku checker issues with Python

I'm trying to create a sudoku checker in Python. I found a version here in another thread, but it does not work properly. I wonder what is the issue?
I receive the following error:
Traceback (most recent call last):
File "C:\Users\Omistaja\Downloads\sudoku_checker_template.py", line 72, in <module>
main()
File "C:\Users\Omistaja\Downloads\sudoku_checker_template.py", line 63, in main
is_valid = check_sudoku_grid(grid)
File "C:\Users\Omistaja\Downloads\sudoku_checker_template.py", line 20, in check_sudoku_grid
if grid[row][col] < 1 or type(grid[row][col]) is not type(1):
TypeError: '<' not supported between instances of 'NoneType' and 'int'
Anyway, below is the whole thing. Only the check_sudoku_grid should be modified, the rest should work. Thanks for your help!
from grids import GRID_NAMES, GRID_RETURNS, GRIDS, GRIDS_BIG, GRIDS_SMALL
GRID_SIZE = 9 # Length of one side of the sudoku
SUBGRID_SIZE = 3 # Length of one side of a cell of the sudoku
def check_sudoku_grid(grid):
"""
Parameter : GRID_SIZE * GRID_SIZE two-dimensional list
Return value : Boolean (True/False)
Checks whether a sudoku grid is valid
ie. doesn't contain any duplicates (besides None)
in any row, column or cell.
"""
for row in range(len(grid)):
for col in range(len(grid)):
# check value is an int
if grid[row][col] < 1 or type(grid[row][col]) is not type(1):
return False
# check value is within 1 through n.
# for example a 2x2 grid should not have the value 8 in it
elif grid[row][col] > len(grid):
return False
# check the rows
for row in grid:
if sorted(list(set(row))) != sorted(row):
return False
# check the cols
cols = []
for col in range(len(grid)):
for row in grid:
cols += [row[col]]
# set will get unique values, its converted to list so you can compare
# it's sorted so the comparison is done correctly.
if sorted(list(set(cols))) != sorted(cols):
return False
cols = []
# if you get past all the false checks return True
return True
def print_grid(grid):
for i in range(GRID_SIZE):
row = ""
for j in range(GRID_SIZE):
try:
val = int(grid[i][j])
except TypeError:
val = "_"
except ValueError:
val = grid[i][j]
row += "{} ".format(val)
if j % SUBGRID_SIZE == SUBGRID_SIZE - 1:
row += " "
print(row)
if i % SUBGRID_SIZE == SUBGRID_SIZE - 1:
print()
def main():
i = 0
for grid in GRIDS:
is_valid = check_sudoku_grid(grid)
print("This grid {:s}.".format(GRID_NAMES[i]))
print("Your function should return: {:s}".format(GRID_RETURNS[i]))
print("Your function returns: {}".format(is_valid))
print_grid(grid)
i += 1
main()
GRID_NAMES = ["is valid", "is valid containing None values", "is valid containing None values (2)", \
"has an invalid row", "has an invalid column", "has an invalid subgrid"]
GRID_RETURNS = ["True","True","True","False","False","False"]
n = None
a = 'a'
b = 'b'
c = 'c'
d = 'd'
e = 'e'
f = 'f'
g = 'g'
GRID_VALID = [[7,3,5, 6,1,4, 8,9,2],
[8,4,2, 9,7,3, 5,6,1],
[9,6,1, 2,8,5, 3,7,4],
[2,8,6, 3,4,9, 1,5,7],
[4,1,3, 8,5,7, 9,2,6],
[5,7,9, 1,2,6, 4,3,8],
[1,5,7, 4,9,2, 6,8,3],
[6,9,4, 7,3,8, 2,1,5],
[3,2,8, 5,6,1, 7,4,9]
]
GRID_VALID_NONE = [[7,3,5, 6,1,4, 8,9,2],
[8,4,2, 9,7,3, 5,6,1],
[9,6,1, 2,8,5, 3,7,4],
[2,n,n, 3,4,n, 1,5,7],
[4,1,3, 8,5,7, 9,2,6],
[5,7,9, 1,2,6, 4,3,8],
[1,5,7, 4,9,n, 6,8,3],
[6,9,4, 7,3,8, 2,1,5],
[n,2,8, 5,6,1, 7,4,9]
]
GRID_VALID_NONE_2 = [[7,3,5, 6,1,4, n,9,2],
[8,4,2, 9,7,3, 5,6,1],
[n,n,1, 2,8,5, 3,7,4],
[2,n,n, 3,4,n, 1,5,7],
[4,1,3, 8,5,7, 9,2,6],
[5,n,9, 1,2,6, 4,3,8],
[1,5,7, 4,9,n, n,8,3],
[6,9,4, 7,3,8, 2,1,5],
[n,2,8, 5,6,1, 7,4,n]
]
GRID_INVALID_SUBGRID = [[7,3,5, 6,1,4, 8,9,2],
[8,4,2, 9,7,3, 5,6,1],
[9,6,1, 2,8,5, 3,7,4],
[2,8,6, 3,4,9, 1,5,7],
[4,1,3, n,5,7, 9,2,6],
[5,7,9, 1,2,6, 4,3,8],
[1,5,7, 4,9,2, 6,8,3],
[6,9,4, 7,3,8, 2,1,5],
[3,2,n, 8,6,1, 7,4,9]
]
GRID_INVALID_ROW = [[7,3,5, 6,1,4, 8,9,2],
[8,4,2, 9,7,3, 5,6,1],
[9,6,1, 2,8,5, 3,7,4],
[2,8,6, 3,4,9, 1,5,7],
[4,1,3, 8,5,7, 9,2,6],
[5,7,9, 1,2,6, 4,3,8],
[1,5,7, 4,9,2, 6,8,n],
[6,9,4, 7,3,8, 2,1,3],
[3,2,8, 5,6,1, 7,4,9]
]
GRID_INVALID_COLUMN = [[7,3,5, 6,1,4, 8,9,2],
[8,4,2, 9,7,3, 5,6,1],
[9,6,1, 2,8,5, 3,7,4],
[2,8,6, 3,4,9, 1,5,7],
[4,1,3, 8,5,7, 9,2,6],
[5,7,9, 1,2,6, 4,3,8],
[1,5,n, 4,9,2, 6,8,3],
[6,9,4, 7,3,8, 2,1,5],
[7,2,8, 5,6,1, n,4,9]
]
GRIDS = [GRID_VALID, GRID_VALID_NONE, GRID_VALID_NONE_2, \
GRID_INVALID_ROW, GRID_INVALID_COLUMN, GRID_INVALID_SUBGRID]
GRID_SMALL_VALID = [[1,2, 3,4],
[3,4, 1,2],
[2,3, 4,1],
[4,1, 2,3]]
GRID_SMALL_VALID_NONE = [[1,n, 3,4],
[3,4, n,n],
[2,n, 4,1],
[4,1, n,3]]
GRID_SMALL_VALID_NONE_2 = [[1,n, 3,4],
[n,n, n,2],
[2,n, 4,1],
[4,n, 2,3]]
GRID_SMALL_INVALID_ROW = [[1,2, 3,n],
[2,3, 4,4],
[3,4, 1,2],
[4,1, 2,3]]
GRID_SMALL_INVALID_COLUMN = [[1,2, 3,4],
[2,3, 4,1],
[3,4, n,1],
[4,1, 2,3]]
GRID_SMALL_INVALID_SUBGRID = [[1,2, 3,4],
[2,3, 4,1],
[3,4, 1,2],
[4,1, 2,3]]
GRIDS_SMALL = [GRID_SMALL_VALID, GRID_SMALL_VALID_NONE, GRID_SMALL_VALID_NONE_2, \
GRID_SMALL_INVALID_ROW, GRID_SMALL_INVALID_COLUMN, GRID_SMALL_INVALID_SUBGRID]
GRID_BIG_VALID = [[4,a,9,f, 1,7,d,8, 6,e,2,c, g,5,3,b],
[2,5,3,1, f,4,b,g, d,9,8,7, 6,a,c,e],
[e,6,d,c, 3,a,5,2, g,b,1,4, 8,f,9,7],
[b,7,g,8, 6,e,9,c, 5,3,a,f, 1,2,d,4],
[8,g,b,4, d,f,e,9, 2,5,7,3, c,1,a,6],
[1,e,6,d, c,8,4,5, a,g,9,b, 2,3,7,f],
[a,f,5,3, 2,1,6,7, 4,c,e,8, 9,b,g,d],
[c,2,7,9, b,3,g,a, f,d,6,1, 4,8,e,5],
[9,4,1,a, e,2,3,d, b,f,c,6, 7,g,5,8],
[5,8,e,g, 7,9,1,6, 3,4,d,a, b,c,f,2],
[7,3,f,6, g,b,c,4, 8,2,5,9, e,d,1,a],
[d,c,2,b, a,5,8,f, 7,1,g,e, 3,6,4,9],
[f,9,8,2, 4,c,7,3, 1,a,b,d, 5,e,6,g],
[6,d,c,5, 9,g,f,1, e,8,4,2, a,7,b,3],
[g,b,4,7, 8,d,a,e, c,6,3,5, f,9,2,1],
[3,1,a,e, 5,6,2,b, 9,7,f,g, d,4,8,c]
]
GRID_BIG_VALID_NONE = [[4,a,9,n, 1,7,d,8, 6,e,n,c, g,5,3,n],
[n,5,3,1, f,n,b,g, d,9,8,7, 6,a,c,e],
[e,6,d,c, 3,a,5,2, g,b,n,4, n,f,n,7],
[b,7,n,8, n,e,9,c, n,3,a,f, 1,2,d,4],
[8,g,b,4, d,f,n,9, 2,5,7,n, c,1,a,6],
[1,e,n,d, c,n,4,5, a,g,n,b, 2,3,7,f],
[a,f,n,3, 2,1,n,7, n,n,e,8, 9,b,g,n],
[c,2,7,9, b,3,g,a, f,d,6,1, 4,n,n,5],
[9,4,1,a, e,n,3,d, b,f,c,6, 7,g,5,8],
[5,n,e,g, 7,9,n,6, 3,4,d,a, b,n,f,2],
[7,3,f,6, g,b,c,4, n,n,5,9, e,d,n,a],
[n,n,n,b, a,5,8,f, 7,1,n,e, 3,6,4,9],
[f,9,8,2, 4,c,7,3, n,n,b,d, 5,e,6,g],
[6,n,c,5, 9,n,f,1, e,n,4,2, a,7,n,3],
[g,b,4,7, 8,d,a,e, c,6,n,5, f,9,2,n],
[3,1,n,n, n,6,2,b, 9,7,f,g, d,4,8,c]
]
GRID_BIG_VALID_NONE_2 = [[4,a,9,f, 1,7,d,n, 6,e,n,n, g,n,3,b],
[2,5,3,1, f,4,b,g, d,n,8,7, 6,a,n,e],
[e,6,d,c, 3,a,n,2, g,b,1,4, 8,f,9,7],
[b,7,g,n, n,e,9,c, 5,3,a,n, n,2,d,4],
[8,g,b,4, d,f,e,n, 2,5,7,3, c,1,a,6],
[1,n,6,d, n,n,4,n, a,g,n,b, 2,3,7,f],
[a,f,5,3, 2,1,6,7, 4,c,e,8, 9,b,g,d],
[c,2,7,n, b,3,g,a, f,d,6,1, 4,8,e,5],
[9,4,1,a, e,2,n,n, b,f,c,n, 7,g,5,8],
[5,n,e,g, 7,9,n,6, 3,4,d,a, b,c,f,2],
[7,3,f,6, g,b,c,4, 8,n,n,n, e,d,1,a],
[d,c,2,n, a,n,8,f, 7,1,g,n, 3,6,n,9],
[f,n,8,2, 4,c,7,3, 1,a,b,d, n,e,6,n],
[6,d,c,5, 9,g,f,1, e,8,4,2, a,7,b,3],
[g,b,n,7, 8,d,a,e, n,6,n,5, f,n,2,n],
[n,1,a,e, n,6,2,b, 9,n,f,g, d,n,8,c]
]
GRID_BIG_INVALID_ROW = [[4,a,9,f, 1,7,d,8, 6,e,2,c, g,5,3,b],
[2,5,3,1, f,4,b,g, d,9,8,7, 6,a,c,e],
[e,6,d,c, 3,a,5,2, g,b,1,4, 8,f,9,7],
[b,7,g,8, 6,e,9,c, 5,3,a,f, 1,2,d,4],
[8,g,b,4, d,f,e,9, 2,5,7,3, c,1,a,6],
[1,e,6,d, c,n,4,5, a,g,9,b, 2,3,7,f],
[a,f,5,3, 2,1,6,7, 4,c,e,8, 9,b,g,d],
[c,2,7,9, b,3,g,a, f,d,6,1, 4,8,e,5],
[9,4,1,a, e,2,3,d, b,f,c,6, 7,g,5,8],
[5,8,e,g, 7,9,1,6, 3,4,d,a, b,c,f,2],
[7,3,f,6, g,b,c,4, 8,2,5,9, e,d,1,a],
[d,c,2,b, a,8,8,f, 7,1,g,e, 3,6,4,9],
[f,9,8,2, 4,c,7,3, 1,a,b,d, 5,e,6,g],
[6,d,c,5, 9,g,f,1, e,8,4,2, a,7,b,3],
[g,b,4,7, 8,d,a,e, c,6,3,5, f,9,2,1],
[3,1,a,e, 5,6,2,b, 9,7,f,g, d,4,8,c]
]
GRID_BIG_INVALID_COLUMN = [[4,a,9,f, 1,7,d,8, 6,e,2,c, g,5,3,b],
[2,5,3,1, f,4,b,g, d,9,8,7, 6,a,c,e],
[e,6,d,c, 3,a,5,2, g,b,1,4, 8,f,9,7],
[b,7,g,8, 6,e,9,c, 5,3,a,f, 1,2,d,4],
[8,g,b,4, d,f,e,9, 2,5,7,3, c,1,a,6],
[1,e,6,d, c,8,4,5, a,g,9,b, 2,3,7,f],
[a,f,5,3, 2,1,6,n, 4,c,e,8, 9,b,7,d],
[c,2,7,9, b,3,g,a, f,d,6,1, 4,8,e,5],
[9,4,1,a, e,2,3,d, b,f,c,6, 7,g,5,8],
[5,8,e,g, 7,9,1,6, 3,4,d,a, b,c,f,2],
[7,3,f,6, g,b,c,4, 8,2,5,9, e,d,1,a],
[d,c,2,b, a,5,8,f, 7,1,g,e, 3,6,4,9],
[f,9,8,2, 4,c,7,3, 1,a,b,d, 5,e,6,g],
[6,d,c,5, 9,g,f,1, e,8,4,2, a,7,b,3],
[g,b,4,7, 8,d,a,e, c,6,3,5, f,9,2,1],
[3,1,a,e, 5,6,2,b, 9,7,f,g, d,4,8,c]
]
GRID_BIG_INVALID_SUBGRID = [[4,a,9,f, 1,7,d,8, 6,e,2,c, g,5,3,b],
[2,5,3,1, f,4,b,g, d,9,8,7, 6,a,c,e],
[e,6,d,c, 3,a,5,2, g,b,1,4, 8,f,9,7],
[b,7,g,8, 6,e,9,c, 5,3,a,f, 1,2,d,4],
[8,g,b,4, d,f,e,9, 2,5,7,3, c,1,a,6],
[1,e,6,d, c,8,4,5, a,g,9,b, 2,3,7,f],
[a,f,5,3, 2,1,6,7, 4,c,e,8, 9,b,g,d],
[c,2,7,9, b,3,g,a, f,d,6,1, 4,8,e,5],
[9,4,1,a, e,2,3,d, b,f,c,6, 7,g,5,8],
[5,8,e,g, 7,9,1,6, 3,n,d,a, b,c,f,2],
[7,3,f,6, g,b,c,4, 8,2,5,9, e,d,1,a],
[d,c,2,b, a,5,8,f, 7,1,g,e, 3,6,4,9],
[f,9,8,2, 4,c,7,3, 1,a,b,d, 5,e,6,g],
[6,d,c,5, 9,g,f,1, e,8,4,2, a,7,b,3],
[g,b,n,7, 8,d,a,e, c,4,3,5, f,9,2,1],
[3,1,a,e, 5,6,2,b, 9,7,f,g, d,4,8,c]
]
GRIDS_BIG = [GRID_BIG_VALID, GRID_BIG_VALID_NONE, GRID_BIG_VALID_NONE_2, \
GRID_BIG_INVALID_ROW, GRID_BIG_INVALID_COLUMN, GRID_BIG_INVALID_SUBGRID]

Issue in splitting into list in python

This is my python code. In this code, the str1 field contains the output of the user command (w).
The last field(WHAT) can contain one or more spaces.
I am trying to create a list from the output of the command (w).
Issue: In the 2nd row, FROM field does not contain any value due to that I am not getting the expected output. If it(FROM field) contains any value, then I am getting the expected output.
Code:
import re
lista, listb, list_final =[],[],[]
str1 = '''17:36:34 up 17 days, 1:48, 3 users, load average: 6.33, 5.37, 5.46
USER TTY FROM LOGIN# IDLE JCPU PCPU WHAT
Arul abc/0 10.XXX.XX.X1 14:41 2:31m 2.04s 1.97s ssh centos#ABC.DEF.H.I
Peter abc/1 17:36 1.00s 0.08s 0.02s w
Joe abc/3 10.XXX.XX.X3 13:59 2:41m 4:38 0.08s /opt/ google / chrome/chrome'''
str1 = re.sub(' +', ' ', str1)
lista = str1.splitlines ()
lista = lista[2:]
listb=[re.split(" ", el, maxsplit=7) for el in lista]
print (listb)
The above code produce the result as below
Result:
[['Arul', 'abc/0', '10.XXX.XX.X1', '14:41', '2:31m', '2.04s', '1.97s', 'ssh centos#ABC.DEF.H.I'],
['Peter', 'abc/1', '17:36', '1.00s', '0.08s', '0.02s', 'w'],
['Joe', 'abc/3', '10.XXX.XX.X3', '13:59', '2:41m', '4:38', '0.08s', '/opt/ google / chrome/chrome']]
I am expecting the result as below. (if any of the field (FROM) contains a null value, the same should be populated in the list output.
Expected result:
[['Arul', 'abc/0', '10.XXX.XX.X1', '14:41', '2:31m', '2.04s', '1.97s', 'ssh centos#ABC.DEF.H.I'],
['Peter', 'abc/1', ' ', '17:36', '1.00s', '0.08s', '0.02s', 'w'],
['Joe', 'abc/3', '10.XXX.XX.X3', '13:59', '2:41m', '4:38', '0.08s', '/opt/ google / chrome/chrome']]
lista, listb, list_final = [], [], []
str1 = '''17:36:34 up 17 days, 1:48, 3 users, load average: 6.33, 5.37, 5.46
USER TTY FROM LOGIN# IDLE JCPU PCPU WHAT
Arul abc/0 10.XXX.XX.X1 14:41 2:31m 2.04s 1.97s ssh centos#ABC.DEF.H.I
Peter abc/1 17:36 1.00s 0.08s 0.02s w
Joe abc/3 10.XXX.XX.X3 13:59 2:41m 4:38 0.08s /opt/ google / chrome/chrome'''
lista = str1.splitlines()
first_row = lista[1]
indices = [first_row.index(x) for x in first_row.split()]
lista = lista[2:]
lines_splitted = []
#split according to first line
for line in lista:
l = []
for i in range(0, len(indices) - 1):
l.append(line[indices[i]:indices[i + 1]])
l.append(line[indices[-1]:])
lines_splitted.append(l)
# adjust whitespaces
for index in range(0, 8):
for line_splitted in lines_splitted:
if line_splitted[index].isspace():
lengths = [len(x[index].strip()) for x in lines_splitted]
max_length = max(lengths)
line_splitted[index] = line_splitted[index].strip() + ' ' * max_length
# remove others whitespaces
for index in range(0, 8):
for line_splitted in lines_splitted:
if not line_splitted[index].isspace():
line_splitted[index] = line_splitted[index].strip()
print(*lines_splitted, sep='\n')

Convert list of IPs to list of corresponding IP Ranges (python)

I want to convert a list of IPs to a list of corresponding IP ranges.
For example:
iplist = ['137.226.161.121', '134.130.4.1', '137.226.161.149', '137.226.161.221', '137.226.161.240', '137.226.161.237', '8.8.8.8', '8.8.4.4', '137.226.161.189', '137.226.161.245', '137.226.161.172', '137.226.161.241', '137.226.161.234', '137.226.161.236', '134.130.5.1']
to
ipranges = ['137.226.161.0/24', '134.130.4.0/24', '8.8.8.0/24', '8.8.4.0/24', '134.130.5.0/24']
What is the most efficient way of doing this? I haven't found a module that offers a function like that. The reason for this function is that a long list of IPs (over 1000 ips) should be converted into a list of subnets to improve readability.
Thank you
If I understand you correctly, you only want to match based on the first 24 bits (/24) being identical. For these tasks I recommend a set:
iplist = ['137.226.161.121', '134.130.4.1', '137.226.161.149', '137.226.161.221', '137.226.161.240', '137.226.161.237', '8.8.8.8', '8.8.4.4', '137.226.161.189', '137.226.161.245', '137.226.161.172', '137.226.161.241', '137.226.161.234', '137.226.161.236', '134.130.5.1']
ipset = set()
for i in iplist:
ipset.add(".".join(i.split(".")[:-1]))
ipranges = [p + ".0/24" for p in ipset]
print(ipranges)
This prints:
['134.130.5.0/24', '8.8.4.0/24', '8.8.8.0/24', '134.130.4.0/24', '137.226.161.0/24']
So what does this code do?
First, we iterate through the list and cut off the last segment of each IP:
segments = "8.8.8.8".split(".") # segments == ["8", "8", "8", "8"]
segments_cut = segments[:-1] # segments_cut == ["8", "8", "8"]
prefix = ".".join(segments_cut) # prefix == "8.8.8"
Now we add these prefixes to the set. A Python set only allows unique elements. This results in: ìpset == {'134.130.5', '8.8.4', '8.8.8', '134.130.4', '137.226.161'}
Finally we iterate through the set and append the suffix ".0/24" to denote a subnet.
Edit: About "efficiency"
I like the answer by darkless, but just know that my solution is significantly faster (1.2 s vs 0.09 s):
>>> import timeit
>>> # darkless' ipaddress solution
>>> timeit.timeit("[str(ipaddress.ip_network('{}/24'.format(ip), strict=False)) for ip in iplist]", setup="import ipaddress;iplist = ['137.226.161.121', '134.130.4.1', '137.226.161.149', '137.226.161.221', '137.226.161.240', '137.226.161.237', '8.8.8.8', '8.8.4.4', '137.226.161.189', '137.226.161.245', '137.226.161.172', '137.226.161.241', '137.226.161.234', '137.226.161.236', '134.130.5.1']", number=10000)
1.186...
>>> # My solution
>>> timeit.timeit("[p + '.0/24' for p in {'.'.join(i.split('.')[:-1]) for i in iplist}]", setup="import ipaddress;iplist = ['137.226.161.121', '134.130.4.1', '137.226.161.149', '137.226.161.221', '137.226.161.240', '137.226.161.237', '8.8.8.8', '8.8.4.4', '137.226.161.189', '137.226.161.245', '137.226.161.172', '137.226.161.241', '137.226.161.234', '137.226.161.236', '134.130.5.1']", number=10000)
0.096...
As Hampus Larsson mentioned, you can use python ipaddress module:
import ipaddress
iplist = ['137.226.161.121', '134.130.4.1', '137.226.161.149', '137.226.161.221', '137.226.161.240', '137.226.161.237', '8.8.8.8', '8.8.4.4', '137.226.161.189', '137.226.161.245', '137.226.161.172', '137.226.161.241', '137.226.161.234', '137.226.161.236', '134.130.5.1']
ipranges = [str(ipaddress.ip_network('{}/24'.format(ip), strict=False)) for ip in iplist]
>>> ipranges
['137.226.161.0/24', '134.130.4.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '8.8.8.0/24', '8.8.4.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '137.226.161.0/24', '134.130.5.0/24']

Categories

Resources