Summary
Hello! Iām trying to create a single-page app that runs 2 ML models on the same input data. For example, 2 Random Forest models, each with a different number of trees. When I execute the predict() method for both models and print the results, the results for the first model donāt match what I get when running the program in my python console.
Steps to reproduce
Code snippet:
import numpy as np
import pandas as pd
import streamlit as st
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
@st.cache_data
def load_data():
california = fetch_california_housing()
x = california.data
y = california.target * 100000
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
return x_train, x_test, y_train, y_test
X_train, X_test, Y_train, Y_test = load_data()
def show_predict_page():
st.title(f"California Housing Price Prediction")
st.write("""### Enter House Features""")
med_inc = st.slider("Median Income", min_value=int(np.min(X_train[:, 0])), max_value=int(np.max(X_train[:, 0])))
house_age = st.slider("House Age", min_value=int(np.min(X_train[:, 1])), max_value=int(np.max(X_train[:, 1])))
ave_rooms = st.slider("Avg Rooms", min_value=int(np.min(X_train[:, 2])), max_value=int(np.max(X_train[:, 2])))
ave_bedrms = st.slider("Avg Bedrooms", min_value=int(np.min(X_train[:, 3])), max_value=int(np.max(X_train[:, 3])))
population = st.slider("Population", min_value=int(np.min(X_train[:, 4])), max_value=int(np.max(X_train[:, 4])))
ave_occup = st.slider("Average Occupancy", min_value=int(np.min(X_train[:, 5])), max_value=int(np.max(X_train[:, 5])))
latitude = st.slider("Latitude", min_value=int(np.min(X_train[:, 6])), max_value=int(np.max(X_train[:, 6])))
longitude = st.slider("Longitude", min_value=int(np.min(X_train[:, 7])), max_value=int(np.max(X_train[:, 7])))
predict_price = st.button("Predict Price")
if predict_price:
pred_json = {
'MedInc': med_inc,
'HouseAge': house_age,
'AveRooms': ave_rooms,
'AveBedrms': ave_bedrms,
'Population': population,
'AveOccup': ave_occup,
'Latitude': latitude,
'longitude': longitude
}
pred_json_df = pd.DataFrame([pred_json])
@st.cache_resource
def load_model(num_trees, random_state):
model = RandomForestRegressor(n_estimators=num_trees, random_state=random_state)
model.fit(X_train, Y_train)
return model
rf_50_trees = load_model(50, 0)
rf_50_trees_price = rf_50_trees.predict(pred_json_df.values)
st.write("Random Forest (50 Trees)", rf_50_trees_price[0]) # Should be 293976.22.
rf_100_trees = load_model(100, 0)
rf_100_trees_price = rf_100_trees.predict(pred_json_df.values)
st.write("Random Forest (100 Trees)", rf_100_trees_price[0]) # Should be 285688.19
Iāve experimented with different caching options (like @st.cache_resource) when loading the 2 models but canāt get it to work. Could you please provide an example of how to run two ML models within the same single-page app?
Thank you so much! Streamlit is an absolutely AMAZING framework!