Hiya,
I have created a function to create a permutation of models and cached it.
However, each time I change how i want to select the best model (which sits outside the function), the function re-runs even though I have cached it.
I dont know why this is happening, can someone help?
Below is my code
@st.cache(suppress_st_warning=True)
def create_model(input_df):
combi = []
modelNumber = 1
output = pd.DataFrame()
placeholder_text = st.empty()
for i in range(1,len(feat_importances)):
combi = (list(combinations(feat_importances.index, i)))
for c in combi:
print('-------------> Model Number:', modelNumber)
variable_string = str(option + ' ~ 1 ')
var_iter =1
for j in list(c):
#print(len(list(c)))
#print(' + ' ,j)
variable_string +=' + ' + str(j)
#final_string = ""
if var_iter == len(list(c)):
#print(variable_string)
#print(var_iterator )
placeholder = 'Model Number: ' + str(modelNumber) + ' Model Variables: ' + str(variable_string)
placeholder_text.text(placeholder)
try:
result = smf.logit(formula= variable_string, data=input_df).fit()
coeffs = result.params
coeffs = pd.DataFrame({'Variable':coeffs.index, 'Values':coeffs.values})
predTable = result.pred_table()
#result.summary()
prsq = result.prsquared
tp = predTable[1,1]
tn = predTable[0,0]
fp = predTable[0,1]
fn = predTable[1,0]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
accuracy = (tp + tn)/(tp+tn+fp+fn)
#new row as dictionary
row1 = [{'Variable':'modelNumber', 'Values':modelNumber}
, {'Variable':'pRSQ', 'Values':prsq}
, {'Variable':'precision', 'Values':precision}
, {'Variable':'recall', 'Values':recall}
, {'Variable':'accuracy', 'Values':accuracy}
, {'Variable':'truepos', 'Values':tp}
, {'Variable':'trueneg', 'Values':tn}
, {'Variable':'falsepos', 'Values':fp}
, {'Variable':'falseneg', 'Values':fn}
, {'Variable':'variableString', 'Values':variable_string}
, {'Variable':'NumVars', 'Values':len(c)}
]
coeffs = coeffs.append(row1, ignore_index=True)
#append row to dataframe
output= output.append(coeffs.set_index('Variable').T)
modelNumber += 1
except :
pass
var_iter +=1
placeholder_text.text('Finished')
placeholder_text.text('')
output.name = 'ModelOutput'
return output
output_df = create_model(uploaded_df)
best_model = output_df.sort_values(by='accuracy',ascending=False)['modelNumber'][0]
Is it happening because I am sorting output_df which is dependent on the create_model function?
Bilal