Here’s an example of how to save and load a scikit-learn model using joblib, which is a commonly used library for serialization:
import numpy as np
import joblib
import pandas as pd
import streamlit as st
# Sample data
data = {
'left_child': [0, 0, 2, -1, -1, 5, -1, -1, 8, -1, -1],
'right_child': [1, 4, 3, -1, -1, 6, 7, -1, 9, 10, -1],
'feature': [2, 3, 2, -2, -2, 3, -2, -2, 3, -2, -2],
'threshold': [0.800000011920929, 1.75, 4.949999809265137, -2.0, -2.0, 1.6500000953674316, -2.0, -2.0, 1.5499999523162842, -2.0, -2.0],
'impurity': [0.6666666666666666, 0.6666666666666666, 0.5, -2.0, -2.0, 0.1680384087791495, -2.0, -2.0, 0.4444444444444444, -2.0, -2.0],
'n_node_samples': [3, 2, 1, -2, -2, 4, -2, -2, 3, -2, -2],
'weighted_n_node_samples': [3.0, 2.0, 1.0, -2.0, -2.0, 4.0, -2.0, -2.0, 3.0, -2.0, -2.0],
'missing_go_to_left': [1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0]
}
# Define the expected data structure
expected_dtype = np.dtype({
'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'],
'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'],
'offsets': [0, 8, 16, 24, 32, 40, 48, 56],
'itemsize': 64
})
# Load the data
loaded_data = np.array(list(zip(*[data[key] for key in expected_dtype.names])), dtype=expected_dtype)
# Save the loaded data
joblib.dump(loaded_data, 'loaded_data.pkl')
# Load the data
reloaded_data = joblib.load('loaded_data.pkl')
# Convert the numpy array to a pandas DataFrame for easier visualization
df = pd.DataFrame(loaded_data)
# Display the DataFrame in Streamlit
st.write(df)