Hello,
I am having hard time with filter error. I am building an app that imports a data csv file, do some calculations, and creates a new dataframe that contains a summary of the analysis. Then using job_filter=st.selectbox(“select”,pd.unique(df[‘test]) where I got an error (attached)
Any advice ? Many thanks in advance
import pandas as pd # read csv, df manipulation
import plotly.express as px # interactive charts
import streamlit as st # data web application development
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV # Alpha of Lasso
from sklearn import linear_model # Ridge Regression
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import RepeatedKFold
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
#from tabulate import tabulate
from numpy import arange
st.set_page_config(
page_title=“Real-Time Data Science Dashboard”,
page_icon=“Active”,
layout=“wide”,
)
st.title(‘Macroeconomic Analysis’)
st.markdown(‘This dashboard will help you get more information about the GDP and Reserves and plots the results’)
file=st.file_uploader(“Upload CSV data file”, type=[‘csv’])
if file is not None:
data= pd.read_csv(file)
st.write(data)
data[‘GDP’] = data[‘GDP’].diff()
data.dropna(inplace=True)
data.reset_index(inplace=True, drop=True)
y = data[‘GDP’]
x = data.iloc[:, 2:]
# Split the Sample at hand
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
Reg = LinearRegression()
Reg.fit(x_train, y_train)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# k = 10 folds and repeat the cross-validation process 3 times.
model = RidgeCV(alphas=arange(0.1, 10, 1), cv=cv, scoring='neg_mean_absolute_error')
model.fit(x_train, y_train)
model.alpha_
Ridge = linear_model.Ridge(alpha=model.alpha_)
Ridge.fit(x_train, y_train)
Polynomial = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
Polynomial.fit(x_train, y_train)
# ----- In Sample predictions with each model & Corresponding tests
y_pred = Reg.predict(x_test)
Reg_r2 = r2_score(y_test, y_pred)
Reg_MSE = mean_squared_error(y_test, y_pred)
# mean_absolute_error(y_test, y_pred)
y_pred = Ridge.predict(x_test)
Ridge_r2 = r2_score(y_test, y_pred)
Ridge_MSE = mean_squared_error(y_test, y_pred)
model_pred = {
"Linear Regression": {'Reg_MSE': Reg_MSE, 'Reg_r2': Reg_r2},
"Ridge regression ": {'Ridge_MSE': Ridge_MSE, 'Ridge_r2': Ridge_r2},
# "Polynomial regression": poly_MSE,
# "Support Vector Machine": SVR_Prediction,
# "Random forest regression":Forest_Prediction,
}
Table = [['Test', 'Regression', 'Ridge', 'Polynomial'],
['MSE', round(Reg_MSE, 2), round(Ridge_MSE, 2), round(Reg_MSE, 2)],
['R-Squared', round(Reg_r2, 2), round(Ridge_r2, 2), round(Reg_r2, 2)]]
del data
df = pd.DataFrame(Table)
st.write(df)
# Create the Filter first:
# top-level filters
job_filter = st.selectbox("Select the Job", pd.unique(df['Test']))
# dataframe filter
df = df[df['Test'] == job_filter]
period=df['date'].iloc[-1]
# KPI
col1, col2, col3=st.columns(3)
with col1:
st.metric(
label = 'GDP',
value = round (df['Ridge'].mean()) # Rounding here eliminates decimals.
)
with col2:
st.metric(
label = 'Reserves',
value = int(df['Regression'].mean())
)
with col3:
st.metric(
label = 'FDI',
value = f'$ {period}'
)
#job_filter = st.selectbox (‘Select the status’, pd.unique (df[‘Polynomial’]))
#d= df [df [‘Polynomial’] == status_filter]
df[‘date’] = pd.to_numeric(df[‘date’])
#df = df.astype({‘date’:‘int’})
fig_col1, fig_col2,fig_col3= st.columns([0.45,0.45,0.1])
# width and length 400 default.
with fig_col1:
st.markdown("First Chart")
fig1 = px.histogram(data_frame=df, x="Ridge")
st.write(fig1)
with fig_col2:
st.markdown("## Second Chart")
fig2 = px.histogram(data_frame=df, x="Regression")
st.write(fig2)
with fig_col1:
st.markdown("Graph")
fig3 = px.line(df, x='date', y=df.columns[2:], width=1200)
st.write(fig3)