Python: Remove substrings from string starting and ending with certain words - python
I have a string of this kind of type:
import re
s = 'T [90] Call: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, proof{ \
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"] \
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _41204, _41234)\
T [90] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, proof{\
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"]\
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _41204, _41234)\
T [81] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q∧([temp(p∨q∨r), p, ¬q], [q])⊢(¬q), _38484, _38486, proof{\
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"]\
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _31664, 0)'
my aim now is to remove all substrings starting with proof{ and ending with } such that the aim output is something like that:
result = 'T [90] Call: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, , _41204, _41234)\
T [90] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, , _41204, _41234)\
T [81] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q∧([temp(p∨q∨r), p, ¬q], [q])⊢(¬q), _38484, _38486, , _31664, 0)'
Based on a similar question I tried something like that:
start = re.escape("proof{")
end = re.escape("}")
result = re.search('%s(.*)%s' % (start, end), s).group(1)
but it doesn't do what I want.
I think I found it:
re.sub("proof{.*?}",'', s)
Related
what is the correct method to print this image?
I'm trying to print the following image via python print(""" ____ .\ / |\\ //\ / \\// \ / / \ \ / / \ \ / / \ \ / /______^ \ \ / ________\ \ \ / / \ \ /\\ / \ //\ /__\\_\ /_//__\ """) input() output ____ .\ / |\ // / \// / / \ / / \ / / \ \ / /______^ \ / ________\ \ / / \ /\ / \ ///__\_\ /_//__ hope someone can help me solve this problem
Backslashes escape the newlines, change it to a raw string with r"...": print(r""" ____ .\ / |\\ //\ / \\// \ / / \ \ / / \ \ / / \ \ / /______^ \ \ / ________\ \ \ / / \ \ /\\ / \ //\ /__\\_\ /_//__\ """) input()
How to cut bytearray?
I have a bytearray, and when I list the array, I get the following data: (b'v10 \ xc73 \ x9a & \ x9edv \ x19 \ xc3B \ xbf \ x95 \ xc8 \ xd8 \ x9dN \ x8f \ xe9 \ x90J \ xax> r1 \ x1d \ xa7 \ x1fU \ x90 \ XE2 (| p \ XF1 \ x02 \ xbdw \ XB8 \ xb9 \ xf3 \ x0e \ xb2n \ xc7 ',). And I need to decrypt this data. But the decryption function only receives data, for example, b'v10 \ xc73 \ x9a & \ x9edv \ x19 \ xc3B \ xbf \ x95 \ xc8 \ xd8 \ x9dN \ x8f \ xe9 \ x90J \ xax> r1 \ x1d \ xa7 \ x1fU \ x90 \ xe2 (| p \ xf1 \ x02 \ xbdw \ xb8 \ xb9 \ xf3 \ x0e \ xb2n \ xc7' without () and , What can I do?
Supposing we have data = (b'foo',) then this data is not a bytearray, nor is it a bytes object: >>> type(data) <class 'tuple'> Because it is a tuple, we may extract that element: >>> data[0] b'foo' >>> type(data[0]) <class 'bytes'>
Python method not recognized
I am learning python so I have worked on this one thing a long time. I still can't find the answer. Interpreter says there is no method called _set_icon() code: import pyodbc as db import pandas as pd import Globals class BatchNodeData(object): """support batch node of the tree. Contains what it needs to do that""" def __init__(self): pass def _set_icon(): sql_conn = db.connect(Globals.SQL_CONN_STRING) b_query = " \ SELECT top 1 * \ FROM dbo.ETLBatchRun a \ Where b.BatchID = " + str(batchid) + \ "Order by a.StatusDT desc" df_icon = pd.read_sql(b_query, sql_conn) if not df_icon.empty: self.last_status = df_icon['StatusID'].iloc[0] def _get_icon_index(): switcher = { 1: 2, 2: 2, 3: 3, 4: 4 } switcher_selected = { 1: 7, 2: 7, 3: 8, 4: 8 } if selected: return switcher_selected.get(statusid, 0) # default 0 (yellow bar) else: return switcher.get(statusid, 0) # default 0 (yellow bar) def __init__(self, batchid): self.batch_id = None self.batch_name = None self.critical = None self.node_icon_index = None self.last_status = None self.selected = False self.running = False sql_conn = db.connect(Globals.SQL_CONN_STRING) b_query = " \ select b.BatchID \ , b.BatchName \ , c.AttributeValue as Critical \ , noRun.AttributeValue as noRun \ from dbo.ETLBatch b (nolock) \ left join dbo.etlbatchattribute (nolock) c \ on c.batchid = b.batchid \ and c.AttributeName = 'Critical' \ and c.AttributeValue = '1' \ left join dbo.etlbatchattribute (nolock) noRun \ on noRun.batchid = b.batchid \ and noRun.AttributeName = 'NotRunnableInETLMonitor' \ and noRun.AttributeValue = '1' \ Where b.BatchID = " + str(batchid) df_batch = pd.read_sql(b_query, sql_conn) for index, row in df_batch.iterrows(): batch_id = row['BatchID'] batch_name = row['BatchName'] critical = row['Critical'] _set_icon() self.node_icon_index = _get_icon_index()
Since you've declared _set_icon() as a method bounded by the class, you should be able to call it as: BatchNodeData._set_icon()
What's wrong on my conditions ? Using the np.where statement to flag my pandas dataframes
The function i am using is keep giving the red filter condition where not applied. Here the function i am using: tolerance = 5 def rag(data): red_filter = ((data.SHIPMENT_MOT_x == 'VESSEL') & \ ((data.latedeliverydate + pd.to_timedelta(tolerance,unit='D')) < data.m6p)) | \ ((data.SHIPMENT_MOT_x == 'AIR') & (data.latedeliverydate < data.m6p)) green_filter = (data.SHIPMENT_MOT_x == 'VESSEL') & \ (data.M6_proposed == data.m6p) & \ ((data.latedeliverydate + pd.to_timedelta(tolerance,unit='D')) >= data.m6p) | \ ((data.SHIPMENT_MOT_x == 'AIR') & (data.latedeliverydate >= data.m6p)) amber_filter = (data.SHIPMENT_MOT_x == 'VESSEL') & \ (data.M6_proposed != data.m6p) & \ ((data.latedeliverydate + pd.to_timedelta(tolerance,unit='D')) >= data.m6p) | \ ((data.SHIPMENT_MOT_x == 'AIR') & (data.latedeliverydate >= data.m6p)) data['RAG'] = np.where(green_filter, 'G', np.where(amber_filter, 'A', np.where(red_filter, 'R', '')))
Here is the solution if you guys are interested. np.where is useful but would not recommend when there are multiple conditions def pmm_rag(data): if ((data.MOT== 'VESSEL') & ((data.m0p + pd.to_timedelta(tolerance,unit='D')) < data.m6p)) | ((data.SHIPMENT_MOT_x == 'AIR') & (data.m0p < data.m6p)): return 'R' elif (data.MOT== 'VESSEL') & (data.M6_proposed == data.m6p) & ((data.m0p + pd.to_timedelta(tolerance,unit='D')) >= data.m6p) | ((data.MOT== 'AIR') & (data.m0p >= data.m6p)): return 'G' elif (data.MOT== 'VESSEL') & (data.M6_proposed != data.m6p) & ((data.m0p + pd.to_timedelta(tolerance,unit='D')) >= data.m6p) | ((data.MOT== 'AIR') & (data.m0p >= data.m6p)): return 'A' else: return ''
Efficent way to loop through Pandas dataframe rows
I am creating a population model featuring education. I start with initial picture of the population that gives the number of people for each age group (0 to 95), and each level of education (0 - No education, to 6 - University). This picture is treated as a column of a dataframe, that will iteratively be populated for each new year as a forecast. In order to be populated there will be assumptions or things such as mortality rate of each age group, enrollment rates and success rates of each education level and so on. The way I solved the problem is by adding a new column and iterate through the rows by using the value for age-1 from the previous year in order to compute the new value (eg. number of males with age 5 is the number of males with age 4 at year-1 less the ones that died) The problem with this solution is that iterating through pandas dataframe rows using for loops and .loc is very inefficient and it takes a lot of time to compute the forecast def add_year_temp(pop_table,time, old_year,new_year, enrollment_rate_primary, success_rate_primary, enrollment_rate_1st_cycle, success_rate_1st_cycle, enrollment_rate_2nd_cycle, success_rate_2nd_cycle, enrollment_rate_3rd_cycle, success_rate_3rd_cycle, enrollment_rate_university, success_rate_university, mortality_rate_0_1, mortality_rate_2_14, mortality_rate_15_64, mortality_rate_65, mortality_mf_ratio, enrollment_mf_ratio, success_mf_ratio): temp_table = pop_table temp_table['year_ts'] = pd.to_datetime(temp_table[time]) temp_table['lag']= temp_table.groupby(['sex','schooling'])[old_year].shift(+1) temp_table = temp_table.fillna(0) for age in temp_table['age'].unique(): for sex in temp_table['sex'].unique(): mortality_mf_ratio_temp = 1 enrollment_mf_ratio_temp = 1 success_mf_ratio_temp = 1 if sex == 'F': mortality_mf_ratio_temp = mortality_mf_ratio enrollment_mf_ratio_temp = enrollment_mf_ratio success_mf_ratio_temp = success_mf_ratio if age <= 1: for schooling in [0]: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)]['lag']) \ * (1 - mortality_rate_0_1 * mortality_mf_ratio_temp) elif 1 < age <= 5: for schooling in [0]: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)]['lag']) \ * (1 - mortality_rate_2_14 * mortality_mf_ratio_temp) a lot of lines later you can see how for example I define the people that finish high-school and enter university... elif 15 < age <= 17: for schooling in [0 ,1 ,2 ,3 ,4]: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==age-1) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) elif age == 18: for schooling in [0 ,1 ,2, 3, 4, 5]: if schooling == 0: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)]['lag']) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) elif schooling == 1: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==(age-1)) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) elif schooling == 2: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==(age-1)) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) elif schooling == 3: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==(age-1)) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) elif schooling == 4: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==(age-1)) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) \ * (1 - enrollment_rate_3rd_cycle * enrollment_mf_ratio_temp \ * success_rate_3rd_cycle * success_mf_ratio_temp) elif schooling == 5: temp_table.loc[(temp_table['age']==age) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling),'lag'] = \ float(temp_table[(temp_table['age']==(age-1)) \ & (temp_table['sex']== sex) \ & (temp_table['schooling']== schooling-1)][old_year]) \ * (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) \ * (enrollment_rate_3rd_cycle * enrollment_mf_ratio_temp \ * success_rate_3rd_cycle * success_mf_ratio_temp) And this continues for all age groups As I said, it does work, but this is neither elegant nor fast...
Without having seen the verifiable output - https://stackoverflow.com/help/mcve - you can either use: temp_table['mortality_mf_ratio'] = temp_table.apply(lambda row: some_function_per_row(row), axis=1) Or you could use np.where https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html temp_table['mortality_mf_ratio'] = np.where(temp_table['sex'] == 'F', 1, 0)