I have a string of this kind of type:
import re
s = 'T [90] Call: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, proof{ \
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"] \
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _41204, _41234)\
T [90] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, proof{\
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"]\
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _41204, _41234)\
T [81] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q∧([temp(p∨q∨r), p, ¬q], [q])⊢(¬q), _38484, _38486, proof{\
-5: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([q,¬q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∧ ¬q])","rule([∧I])","d0(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","d1(([temp(p∨q∨r),p,¬q],[])⊢q∧([temp(p∨q∨r),p,¬q],[q])⊢(¬q))","step(9)"]\
\
-4: ["assumptions([p,¬q,p∨q∨r])","premisses_origin([¬q,q∧ ¬q])","premisses_no_origin([])","premisses_exc_origin([¬q])","conclusion([q])","rule([¬E])","d0(([temp(p∨q∨r),p],[])⊢q)","d1(([temp(p∨q∨r),p,¬q],[])⊢(q∧ ¬q))","step(8)"]\
\
-3: ["assumptions([p,p∨q∨r])","premisses_origin([q])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨I])","d0(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢q)","step(6)"]\
\
-2: ["assumptions([p,p∨q∨r])","premisses_origin([q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([p])","conclusion([p→q∨p∨r])","rule([→I])","d0(([temp(p∨q∨r)],[])⊢(p→q∨p∨r))","d1(([temp(p∨q∨r),p],[])⊢(q∨p∨r))","step(5)"]\
\
-1: ["assumptions([p∨q∨r])","premisses_origin([p→q∨p∨r,q∨r→q∨p∨r])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([(p→q∨p∨r)∧(q∨r→q∨p∨r)])","rule([∧I])","d0(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","d1(([temp(p∨q∨r)],[])⊢(p→q∨p∨r)∧([temp(p∨q∨r)],[p→q∨p∨r])⊢(q∨r→q∨p∨r))","step(3)"]\
\
0: ["assumptions([p∨q∨r])","premisses_origin([p∨q∨r,(p→q∨p∨r)∧(q∨r→q∨p∨r)])","premisses_no_origin([])","premisses_exc_origin([])","conclusion([q∨p∨r])","rule([∨E])","d0(([p∨q∨r],[])⊢(q∨p∨r))","d1(([temp(p∨q∨r)],[])⊢((p→q∨p∨r)∧(q∨r→q∨p∨r)))","step(2)"]\
\
1: ["p∨q∨r","step(1)"]\
\
2: ["p","step(4)"]\
\
3: ["¬q","step(7)"]\
\
}, _31664, 0)'
my aim now is to remove all substrings starting with proof{ and ending with } such that the aim output is something like that:
result = 'T [90] Call: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, , _41204, _41234)\
T [90] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q, _41226, _41228, , _41204, _41234)\
T [81] Fail: proof(([temp(p∨q∨r), p, ¬q], [])⊢q∧([temp(p∨q∨r), p, ¬q], [q])⊢(¬q), _38484, _38486, , _31664, 0)'
Based on a similar question I tried something like that:
start = re.escape("proof{")
end = re.escape("}")
result = re.search('%s(.*)%s' % (start, end), s).group(1)
but it doesn't do what I want.
I think I found it:
re.sub("proof{.*?}",'', s)
I am creating a population model featuring education.
I start with initial picture of the population that gives the number of people for each age group (0 to 95), and each level of education (0 - No education, to 6 - University).
This picture is treated as a column of a dataframe, that will iteratively be populated for each new year as a forecast.
In order to be populated there will be assumptions or things such as mortality rate of each age group, enrollment rates and success rates of each education level and so on.
The way I solved the problem is by adding a new column and iterate through the rows by using the value for age-1 from the previous year in order to compute the new value (eg. number of males with age 5 is the number of males with age 4 at year-1 less the ones that died)
The problem with this solution is that iterating through pandas dataframe rows using for loops and .loc is very inefficient and it takes a lot of time to compute the forecast
def add_year_temp(pop_table,time,
old_year,new_year,
enrollment_rate_primary,
success_rate_primary,
enrollment_rate_1st_cycle,
success_rate_1st_cycle,
enrollment_rate_2nd_cycle,
success_rate_2nd_cycle,
enrollment_rate_3rd_cycle,
success_rate_3rd_cycle,
enrollment_rate_university,
success_rate_university,
mortality_rate_0_1,
mortality_rate_2_14,
mortality_rate_15_64,
mortality_rate_65,
mortality_mf_ratio,
enrollment_mf_ratio,
success_mf_ratio):
temp_table = pop_table
temp_table['year_ts'] = pd.to_datetime(temp_table[time])
temp_table['lag']= temp_table.groupby(['sex','schooling'])[old_year].shift(+1)
temp_table = temp_table.fillna(0)
for age in temp_table['age'].unique():
for sex in temp_table['sex'].unique():
mortality_mf_ratio_temp = 1
enrollment_mf_ratio_temp = 1
success_mf_ratio_temp = 1
if sex == 'F':
mortality_mf_ratio_temp = mortality_mf_ratio
enrollment_mf_ratio_temp = enrollment_mf_ratio
success_mf_ratio_temp = success_mf_ratio
if age <= 1:
for schooling in [0]:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)]['lag']) \
* (1 - mortality_rate_0_1 * mortality_mf_ratio_temp)
elif 1 < age <= 5:
for schooling in [0]:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)]['lag']) \
* (1 - mortality_rate_2_14 * mortality_mf_ratio_temp)
a lot of lines later you can see how for example I define the people that finish high-school and enter university...
elif 15 < age <= 17:
for schooling in [0 ,1 ,2 ,3 ,4]:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==age-1) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp)
elif age == 18:
for schooling in [0 ,1 ,2, 3, 4, 5]:
if schooling == 0:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)]['lag']) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp)
elif schooling == 1:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==(age-1)) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp)
elif schooling == 2:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==(age-1)) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp)
elif schooling == 3:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==(age-1)) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp)
elif schooling == 4:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==(age-1)) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) \
* (1 - enrollment_rate_3rd_cycle * enrollment_mf_ratio_temp \
* success_rate_3rd_cycle * success_mf_ratio_temp)
elif schooling == 5:
temp_table.loc[(temp_table['age']==age) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling),'lag'] = \
float(temp_table[(temp_table['age']==(age-1)) \
& (temp_table['sex']== sex) \
& (temp_table['schooling']== schooling-1)][old_year]) \
* (1 - mortality_rate_15_64 * mortality_mf_ratio_temp) \
* (enrollment_rate_3rd_cycle * enrollment_mf_ratio_temp \
* success_rate_3rd_cycle * success_mf_ratio_temp)
And this continues for all age groups
As I said, it does work, but this is neither elegant nor fast...
Without having seen the verifiable output - https://stackoverflow.com/help/mcve - you can either use:
temp_table['mortality_mf_ratio'] = temp_table.apply(lambda row: some_function_per_row(row), axis=1)
Or you could use np.where https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html
temp_table['mortality_mf_ratio'] = np.where(temp_table['sex'] == 'F', 1, 0)