ValueError: could not convert string to float: 'Male'

#Core pkgs

import streamlit as st

import streamlit.components.v1 as stc 

from streamlit_pandas_profiling import st_profile_report

from pandas_profiling import ProfileReport

#EDA pkgs

import pandas as pd

import numpy as np

import codecs 

#Utils

import os

import joblib

# Images

from PIL import Image 

from xgboost import XGBClassifier

from lightgbm import LGBMClassifier

from catboost import CatBoostClassifier

from gmpy2 import nan

 

def main():

    """

    docstring

    """

    menu = ["Home","EDA","Predition","About"]

    choice = st.sidebar.selectbox("Menu",menu)

    

    if choice =="Home":

        st.subheader("UmojaHack")

    elif choice == "EDA":

        st.subheader("Automated EDA with pandas_profiling")

        data_file=st.file_uploader("upload your dataset")

        if data_file is not None:

          df=pd.read_csv(data_file)

          st.dataframe(df.head())

          profile=ProfileReport(df)

          st_profile_report(profile)

    

    elif choice == "Predition":

        st.subheader("Predition")

        stc.html("""

    <div style="background-color:tomato;padding:10px;border-radius:10px">

    <h1 style="color:white;text-align:center;"> Prediction</h1>

    </div>  """)

       

        def get_value(val,my_dict):

          for key ,value in my_dict.items():

            if val == key:

              return value

        

        col1,col2= st.beta_columns(2)

        

        with col1:

          policy_start_date_day= st.number_input("Policy Start Data by Day",1,30)

          policy_start_date_month= st.number_input("Policy Start Data by month",1,12)

          policy_start_date_quarter= st.number_input("Policy Start Data by quarter",1,5)

          

          policy_end_date_day= st.number_input("Policy End Data by Day",1,30)

          policy_end_date_month= st.number_input("Policy End Data by month",1,12)

          policy_end_date_quarter= st.number_input("Policy End Data by quarter",1,5)

          

          first_transaction_date_day= st.number_input("First Transaction by Day",1,30)

          first_transaction_date_month= st.number_input("First Transaction by month",1,12)

                         

        with col2:

          age = st.slider("Select Age",1,300,30)

          

          gender_dict = {"Male":2,"Female":4,"others":6}

          gender= st.selectbox("Please Select your gender",tuple(gender_dict.keys()))

          result_gender = get_value(gender,gender_dict)

          # st.write(result_gender)

                   

          product_name_dict = {'Car Classic':1, 'CarSafe':5, 'Muuve':8, 'CVTP':0, 'Car Plus':2,'Motor Cycle':7,

                         'Customized Motor':6, 'CarFlex':4, 'Car Vintage':3}

          product_name= st.selectbox("Please Select the ProductName",tuple(product_name_dict.keys()))

          result_product_name = get_value(product_name,product_name_dict)

          # st.text(result_productname)

                  

          Car_Category_dict = { "Saloon":9, "JEEP":2, nan:6, "Motorcycle":5, "Truck":14, "Bus":0, "Mini Bus":3,

                                "Pick Up":7, "Mini Van":4, "Van":15, "Pick Up ":8, "CAMRY CAR HIRE":1,

                                "Wagon":16, "Shape Of Vehicle Chasis":11, "Sedan":10, "Station 4 Wheel":12,

                                "Tipper Truck":13}

          Car_Category= st.selectbox("Please Select Car Category ",tuple(Car_Category_dict.keys()))

          result_Car_Category= get_value(Car_Category,Car_Category_dict)

          # st.text(result_Car_Category)

         

          no_pol_prod_name =st.number_input("Number of Policy/ Product Name",1,1000)

          Date_diff =st.number_input("Data Difference",1,400)

          no_pol=st.number_input("Number of Policy",1,400,30)

          first_transaction_date_quarter= st.number_input("First Transaction by quarter",1,5)

        # st.write(result_gender)

        

          # Result and in json format

        results = [gender, age, no_pol, Car_Category, product_name, Date_diff, policy_start_date_day, policy_start_date_month, policy_start_date_quarter,

                   policy_end_date_day, policy_end_date_month, policy_end_date_quarter, first_transaction_date_day, first_transaction_date_month,

                   first_transaction_date_quarter, no_pol_prod_name]

        

        displayed_results = [gender, age, no_pol, Car_Category, product_name, Date_diff, policy_start_date_day, policy_start_date_month, policy_start_date_quarter,

                   policy_end_date_day, policy_end_date_month, policy_end_date_quarter, first_transaction_date_day, first_transaction_date_month,

                   first_transaction_date_quarter, no_pol_prod_name]

                

        prettified_result = {

          "gender" :gender, 

          "Age":age, 

          "no_pol":no_pol, 

          "Car_Category":Car_Category, 

          "product_name":product_name, 

          "Date_diff":Date_diff, 

          "policy_start_date_day":policy_start_date_day, 

          "policy_start_date_month":policy_start_date_month,

          "policy_start_date_quarter":policy_start_date_quarter,

          "policy_end_date_day":policy_end_date_day, 

          "policy_end_date_month":policy_end_date_month, 

          "policy_end_date_quarter":policy_end_date_quarter, 

          "first_transaction_date_day":first_transaction_date_day, 

          "first_transaction_date_month":first_transaction_date_month,

          "first_transaction_date_quarter":first_transaction_date_quarter, 

          "no_pol_prod_name":no_pol_prod_name

        }

        sample_data = np.array(results).reshape(1, -1)

                

        if st.checkbox("Your Inputs Summary"):

          st.json(prettified_result)

          st.text("Vectorized as ::{}".format(results))

          st.subheader("Prediction")

              

        if st.checkbox("Select Model for Prediction"):

          all_ml_dict= {

            "xgboost":XGBClassifier(),

            "Catboost": CatBoostClassifier(),

            "LGBM": LGBMClassifier()}

        

        # Find the Key From Dictionary

          def get_key(val,my_dict):

            for key ,value in my_dict.items():

              if val == value:

                return key

            

        # Load Models

          def load_model_n_predict(model_file):

            loaded_model = joblib.load(open(os.path.join(model_file),"rb"))

            return loaded_model

          

          # Model Selection

          model_choice = st.selectbox('Model Choice',list(all_ml_dict.keys()))

          prediction_label = {"Would not Claim Insurance": 0,"Would Claim Insurance": 1}

          if st.button("Predict"):

            # if model_choice == 'Catboost':

            #   loaded_model = joblib.load(open("models/catboost3_model.pickle","rb"))

            #   prediction = loaded_model.predict(sample_data)

            #   # final_result = get_key(prediction,prediction_label)

            #   # # st.info(final_result)

            if model_choice == 'LGBM':

              model_predictor = load_model_n_predict("models/lgbm_model4.pickle")

              prediction = model_predictor.predict(sample_data)

              # st.text(prediction)

            elif model_choice == 'xgboost':

              model_predictor = load_model_n_predict("models/xgboost_model4.pickle")

              prediction = model_predictor.predict(sample_data)

              # st.text(prediction)

                

              final_result = get_key(prediction,prediction_label)

              st.success(final_result)

                                                                      

    else:

        st.subheader("About Me")

 
                    

       

if __name__ == "__main__":

    main()

Here is the code

Here is the errror, please help where i went wrong

HI @seyirex!

First, Welcome to the Streamlit Community! :partying_face: :partying_face: :partying_face: :tada: :tada: :tada: :tada: :tada: :grin:

Can you create a minimum working example (minimum amount of code) that reproduces this problem??? I’m just having a bit of trouble reproducing your issue!

From what I can see in your error, it looks like the error may stem from passing a data set to your model_predictor that has strings in the gender section and not numbers. In my experience, these would all need to be converted into numbers in the data set.

Let me know if this helps! If not post that minimum working example and I will see if I can help.

Cheers,
Marisa

HI @Marisa_Smith

Thanks for the reply, i encoded cat feature using labeling encoding and mapping when training my dataset might not be the issue here it might be the model_predictor and please am kind of new here what does minimum working example mean so i can put it into effect.

Thanks,
Seyi.

Hey @seyirex,

NO problem!! Everyone starts as beginner! :nerd_face:

A minimum working example (sometimes abbreviated as MWE) is the least number of lines of code that produces the error you see. This would mean eliminating all un-necessary code and packages (especially packages that are modules that you wrote yourself) are removed in the example.

What is left are just the lines of code that produce the problem, this makes it easier for someone on the forum, to try and help solve the problem. So people can copy your code and be able to get an example working locally to help you debug.

Also, if your working with a particular data set (like I think you are) it might be helpful to upload a sample of that (or a link to your github!!).

Also, I dont think its your training set that might be the problem. It looks like the set your having trouble with is the sample_data set on line 353. I think that hasn’t been encoded properly and your model is trying to run prediction with ‘male’ instead of the encoded version!

cheers,
Marisa

Hey @Marisa_Smith

Thanks alot for the explanation :100:

I found another method, I used pycaret to recreate my pipeline again it was easy, fast and less complicated to implement.

I want to ask how can email or text message notification be implemented in streamlit.