Unable to get correct ML prediction with saved JSON

sakshams1990 · May 11, 2022, 9:24am

The ML model requires a JSON as an input containing categorical and numerical features.

On importing the JSON through file uploader, I am passing the read JSON file (fraud file) for the ML prediction to get the output.

I am also storing each feature name in uploaded JSON to a new dictionary (streamlit_dict) and using that as an input for ML prediction.
On using these two different JSON/dictionary for ML prediction, the output of ML model is different.

import streamlit as st
import json
from single_record_prediction import predict_single_json_record
import pandas as pd
import plotly.express as px


def plotly_barplot_for_causality(causality_dict):
    new_dic = causality_dict['fraud_reported_n'] | causality_dict["fraud_reported_y"]
    df = pd.DataFrame(list(new_dic.items()), columns=['features', 'shap_value'])
    df['fraud_reported'] = ['fraud_reported_y' if float(x) < 0 else 'fraud_reported_n' for x in df['shap_value']]
    df.sort_values(by='shap_value', ascending=False, inplace=True)
    fig = px.bar(df, y=df['features'], x=df['shap_value'], orientation='h', color=df['fraud_reported'],
                 labels=df['fraud_reported'])
    fig.update_layout(showlegend=True)
    return fig


def app():
    streamlit_dict = {}
    json_file = st.file_uploader('INPUT JSON FILE!!', type=["json"])
    fraud_file = None
    if json_file is not None:
        json_file.seek(0)
        fraud_file = json.load(json_file)
        st.json(fraud_file)

    if fraud_file:
        col1, col2, col3, col4 = st.columns([1, 1, 1, 1])

        with col1:
            policy_annual_premium = st.number_input("Annual Policy Premium", min_value=400.00, max_value=2500.00,
                                                    value=fraud_file['policy_annual_premium'], format="%.2f")
            streamlit_dict['policy_annual_premium'] = policy_annual_premium


            #############################################################################
            capital_loss = st.number_input("Capital Loss", min_value=-111100, max_value=0,
                                           value=fraud_file['capital-loss'])
            streamlit_dict['capital-loss'] = capital_loss

            #############################################################################
            bodily_injuries = st.number_input("Number of People Injured", min_value=0, max_value=3,
                                              value=fraud_file['bodily_injuries'])
            streamlit_dict["bodily_injuries"] = bodily_injuries

            #############################################################################
            vehicle_claim = st.number_input("Vehicle Claim", min_value=70, max_value=79560,
                                            value=fraud_file['vehicle_claim'])
            streamlit_dict["vehicle_claim"] = vehicle_claim

            #############################################################################
            occupation_values = ["adm-clerical", "armed-forces", "craft-repair",
                                 "exec-managerial", "farming-fishing",
                                 "handlers-cleaners", "machine-op-inspct",
                                 "other-service", "priv-house-serv",
                                 "prof-specialty",
                                 "protective-serv", "sales", "tech-support",
                                 "transport-moving"]
            insured_occupation = st.selectbox("Insured Occupation", occupation_values,
                                              index=occupation_values.index(fraud_file['insured_occupation']))
            streamlit_dict["insured_occupation"] = insured_occupation

            ##############################################################################
            collision_values = ["Front Collision", "Rear Collision", "Side Collision", "UNKNOWN"]
            collision_type = st.selectbox("Collision Type", collision_values,
                                          index=collision_values.index(fraud_file['collision_type']))
            streamlit_dict["collision_type"] = collision_type

            ##############################################################################
            property_damage_value = ["YES", "NO", "UNKNOWN"]
            property_damage = st.selectbox("Property Damage", property_damage_value,
                                           index=property_damage_value.index(fraud_file['property_damage']))
            streamlit_dict["property_damage"] = property_damage
            ##############################################################################
            age = st.number_input("Age", min_value=19, max_value=64, step=1, value=fraud_file['age'])
            streamlit_dict["age"] = age

        with col2:
            umbrella_limit = st.number_input("Umbrella Limit", min_value=0, max_value=10000000, step=1000000,
                                             value=fraud_file['umbrella_limit'])
            streamlit_dict["umbrella_limit"] = umbrella_limit
            ##############################################################################

            incident_severity_values = ['Trivial Damage', 'Minor Damage', 'Major Damage', 'Total Loss']
            incident_severity = st.selectbox("Incident Severity", incident_severity_values,
                                             index=incident_severity_values.index(fraud_file['incident_severity']))
            streamlit_dict["incident_severity"] = incident_severity

            ##############################################################################

            witnesses = st.number_input("Witnesses", min_value=0, max_value=3, value=fraud_file['witnesses'])
            streamlit_dict["witnesses"] = witnesses
            ##############################################################################

            auto_make_values = ["Accura", "Audi", "BMW", "Chevrolet", "Dodge", "Ford", "Honda", "Jeep",
                                "Mercedes", "Nissan", "Saab", "Suburu", "Toyota", "Volkswagen"]
            auto_make = st.selectbox("Automobile Model", auto_make_values,
                                     index=auto_make_values.index(fraud_file['auto_make']))
            streamlit_dict["auto_make"] = auto_make
            ###############################################################################
            insured_hobbies_values = ["base-jumping", "basketball", "board-games",
                                      "bungie-jumping", "camping", "chess", "cross-fit",
                                      "dancing", "exercise", "golf", "hiking", "kayaking",
                                      "movies", "paintball", "polo", "reading", "skydiving",
                                      "sleeping", "video-games", "yachting"]

            insured_hobbies = st.selectbox("Insured Hobbies", insured_hobbies_values,
                                           index=insured_hobbies_values.index(fraud_file['insured_hobbies']))
            streamlit_dict["insured_hobbies"] = insured_hobbies
            ##############################################################################

            authorities_contacted_values = ["Ambulance", "Fire", "None", "Other", "Police"]
            authorities_contacted = st.selectbox("Authorities Contacted", authorities_contacted_values,
                                                 index=authorities_contacted_values.index(
                                                     fraud_file['authorities_contacted']))
            streamlit_dict["authorities_contacted"] = authorities_contacted
            ##############################################################################

            police_report_available_values = ["YES", "NO", "UNKNOWN"]
            police_report_available = st.selectbox("Police Report Available", police_report_available_values,
                                                   index=police_report_available_values.index(
                                                       fraud_file['police_report_available']))
            streamlit_dict["police_report_available"] = police_report_available
            ##############################################################################
            policy_deductable_values = ["500", "1000", "2000"]
            policy_deductable = st.selectbox("Policy Deductable", policy_deductable_values,
                                             index=policy_deductable_values.index(fraud_file['policy_deductable']))
            streamlit_dict["policy_deductable"] = policy_deductable

        with col3:
            insured_education_level_values = ["High School", "College", "Associate", "JD", "MD", "Masters", "PhD"]
            insured_education_level = st.selectbox("Education level", insured_education_level_values,
                                                   index=insured_education_level_values.index(
                                                       fraud_file['insured_education_level']))
            streamlit_dict["insured_education_level"] = insured_education_level

            incident_hour_of_the_day = st.number_input("Hour of the day", min_value=0, max_value=24, step=1,
                                                       value=fraud_file['incident_hour_of_the_day'])
            streamlit_dict["incident_hour_of_the_day"] = incident_hour_of_the_day

            injury_claim = st.number_input("Injury Claim", min_value=0, max_value=21450, step=1,
                                           value=fraud_file['injury_claim'])
            streamlit_dict["injury_claim"] = injury_claim

            policy_state_values = ["IL", "IN", "OH"]
            policy_state = st.selectbox("Policy State", policy_state_values,
                                        index=policy_state_values.index(fraud_file['policy_state']))
            streamlit_dict["policy_state"] = policy_state

            insured_relationship_values = ["husband", "not-in-family", "other-relative",
                                           "own-child", "unmarried", "wife"]
            insured_relationship = st.selectbox("Insured Relationship", insured_relationship_values,
                                                index=insured_relationship_values.index(
                                                    fraud_file['insured_relationship']))
            streamlit_dict["insured_relationship"] = insured_relationship

            incident_city_values = ["Arlington", "Columbus", "Hillsdale",
                                    "Northbend", "Northbrook", "Riverwood", "Springfield"]
            incident_city = st.selectbox("Incident City", incident_city_values,
                                         index=incident_city_values.index(fraud_file['incident_city']))
            streamlit_dict["incident_city"] = incident_city

            auto_year_values = ["1995", "1996", "1997", "1998", "1999", "2000", "2001",
                                "2002", "2003", "2004", "2005", "2006", "2007", "2008",
                                "2009", "2010", "2011", "2012", "2013", "2014", "2015"]
            auto_year = st.selectbox("Auto Make Year", auto_year_values,
                                     index=auto_year_values.index(fraud_file['auto_year']))
            streamlit_dict["auto_year"] = auto_year

        with col4:
            capital_gains = st.number_input("Capital Gains", min_value=0, max_value=105000,
                                            value=fraud_file['capital-gains'])
            streamlit_dict["capital-gains"] = capital_gains

            number_of_vehicles_involved = st.number_input("Number of Vehicles", min_value=1, max_value=4,
                                                          value=fraud_file['number_of_vehicles_involved'])
            streamlit_dict["number_of_vehicles_involved"] = number_of_vehicles_involved

            property_claim = st.number_input("Property Claim", min_value=0, max_value=23670, step=1,
                                             value=fraud_file['property_claim'])
            streamlit_dict["property_claim"] = property_claim

            policy_csl_values = ["100/300", "250/500", "500/1000"]
            policy_csl = st.selectbox("Policy CSL", policy_csl_values,
                                      index=policy_csl_values.index(fraud_file['policy_csl']))
            streamlit_dict["policy_csl"] = policy_csl

            incident_type_values = ["Multi-vehicle Collision", "Parked Car", "Single Vehicle Collision",
                                    "Vehicle Theft"]
            incident_type = st.selectbox("Incident Type", incident_type_values,
                                         index=incident_type_values.index(fraud_file['incident_type']))
            streamlit_dict["incident_type"] = incident_type

            incident_state_values = ["NC", "NY", "OH", "PA", "SC", "VA", "WV"]
            incident_state = st.selectbox("Incident State", incident_state_values,
                                          index=incident_state_values.index(fraud_file['incident_state']))
            streamlit_dict["incident_state"] = incident_state

            months_as_customer = st.number_input("Months As Customer", min_value=0, max_value=479, step=1,
                                                 value=fraud_file['months_as_customer'])
            streamlit_dict["months_as_customer"] = months_as_customer

        predict_button = st.sidebar.button(label="PREDICT")

        print(streamlit_dict == fraud_file)
        if predict_button:
            response = predict_single_json_record(fraud_file)  # response = predict_single_json_record(streamlit_dict)
            st.text_area(label="Fraud Score", value=response['fraudScore'], height=10)
            st.text_area(label="Fraud Status", value=response['fraudStatus'], height=10)
            st.json(response['causality'])

            fig = plotly_barplot_for_causality(response['causality'])
            st.write(fig)

            fraud_explanation_response = list(">\t" + i for i in response['explanation']['fraud_explanation'])
            fraud_explanation_response = "\n".join(fraud_explanation_response)
            st.text_area(label="Fraud Explanation", value=fraud_explanation_response, height=175,
                         key="Fraud Explanation")

            non_fraud_explanation_response = list(">\t" + i for i in response['explanation']['non_fraud_explanation'])
            non_fraud_explanation_response = "\n".join(non_fraud_explanation_response)
            st.text_area(label="Non Fraud Explanation", value=non_fraud_explanation_response,
                         height=150)


if __name__ == '__main__':
    app()

The response for fraud_file and streamlit_dict are different whereas if I compare the two dictionaries, they are exactly the same.

Could someone please highlight what could be the issue?

Topic		Replies	Views
Streamlit refreshes automatically Using Streamlit	5	1016	December 3, 2023
Streamlit UploadFile Using Streamlit	1	2807	August 20, 2023
Running 2 ML models in the same page Using Streamlit	7	1362	March 30, 2024
Need help with Streamlit code and error message Using Streamlit	9	1043	April 12, 2023
Expected str, bytes or os.PathLike object, not UploadedFile, Error Using Streamlit	6	10123	May 13, 2022

Unable to get correct ML prediction with saved JSON

Related topics

Hello there 👋🏻

Cookie settings

Strictly necessary cookies

Performance cookies

Functional cookies

Targeting cookies