diff --git a/Regression.py b/Regression.py
new file mode 100644
index 0000000..8a24bf6
--- /dev/null
+++ b/Regression.py
@@ -0,0 +1,288 @@
+import pandas as pd
+import streamlit as st
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LinearRegression, Ridge, Lasso
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+
+#=================Project Overview=========================#
+st.title("Real Estate Price Prediction")
+st.write("""
+### Project Overview
+This project involves predicting real estate prices based on various features such as house age, distance to the nearest MRT station, and the number of nearby convenience stores. 
+We will perform data cleaning, feature engineering, scaling, and visualization to prepare the data for modeling.
+""")
+
+#=================1: Data Cleaning and Structuring=========================#
+# Load the datasets
+train_data_path = 'C:/Users/71591/Desktop/dataset/Train Real estate.csv'
+test_data_path = 'C:/Users/71591/Desktop/dataset/Test Real estate.csv'
+
+# Read in the datasets
+train_df = pd.read_csv(train_data_path)
+test_df = pd.read_csv(test_data_path)
+
+# Display the first few rows of the train and test datasets in Streamlit
+st.write("### Train Data Overview")
+st.write(train_df.head())
+
+st.write("### Test Data Overview")
+st.write(test_df.head())
+
+# Convert 'X1 transaction date' into datetime and extract year and month for both train and test data
+def convert_transaction_date(date):
+    try:
+        return pd.to_datetime(date, format='%Y.%f')
+    except:
+        return pd.to_datetime(date, format='%Y')
+
+# Apply the conversion for train and test datasets
+train_df['transaction_date'] = train_df['X1 transaction date'].apply(convert_transaction_date)
+test_df['transaction_date'] = test_df['X1 transaction date'].apply(convert_transaction_date)
+
+# Extract year and month from 'transaction_date'
+train_df['transaction_year'] = train_df['transaction_date'].dt.year
+train_df['transaction_month'] = train_df['transaction_date'].dt.month
+
+test_df['transaction_year'] = test_df['transaction_date'].dt.year
+test_df['transaction_month'] = test_df['transaction_date'].dt.month
+
+# Drop the original 'X1 transaction date' column
+train_df = train_df.drop(columns=['X1 transaction date'])
+test_df = test_df.drop(columns=['X1 transaction date'])
+
+# Display the processed train and test datasets with new date features
+st.write("### Processed Train Data with Date Features")
+st.write(train_df.head())
+
+st.write("### Processed Test Data with Date Features")
+st.write(test_df.head())
+
+#=================Scaling Numerical Features=============================#
+# Scale numerical features
+scaler = StandardScaler()
+features_to_scale = ['X2 house age', 'X3 distance to the nearest MRT station', 'X4 number of convenience stores']
+
+# Fit the scaler on train data and transform both train and test data
+train_df[features_to_scale] = scaler.fit_transform(train_df[features_to_scale])
+test_df[features_to_scale] = scaler.transform(test_df[features_to_scale])
+
+# Display the scaled data
+st.write("### Scaled Train Data")
+st.write(train_df.head())
+
+st.write("### Scaled Test Data")
+st.write(test_df.head())
+
+#=================Data Visualization=========================#
+st.write("### Data Visualizations")
+
+# 1. Correlation Heatmap
+st.write("#### Correlation Heatmap")
+corr_matrix = train_df.corr()
+plt.figure(figsize=(10, 6))
+sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
+st.pyplot(plt)
+
+# 2. Scatter Plots: Numerical features vs. Target
+st.write("#### Scatter Plots")
+fig, ax = plt.subplots(1, 3, figsize=(18, 6))
+
+# Scatter plot for House Age vs House Price
+sns.scatterplot(x='X2 house age', y='Y house price of unit area', data=train_df, ax=ax[0])
+ax[0].set_title('House Age vs House Price')
+
+# Scatter plot for Distance to MRT vs House Price
+sns.scatterplot(x='X3 distance to the nearest MRT station', y='Y house price of unit area', data=train_df, ax=ax[1])
+ax[1].set_title('Distance to MRT vs House Price')
+
+# Scatter plot for Convenience Stores vs House Price
+sns.scatterplot(x='X4 number of convenience stores', y='Y house price of unit area', data=train_df, ax=ax[2])
+ax[2].set_title('Number of Convenience Stores vs House Price')
+
+st.pyplot(fig)
+
+# 3. Histograms for Key Features
+st.write("#### Histograms")
+fig, ax = plt.subplots(1, 3, figsize=(18, 6))
+
+# Histogram for House Price
+sns.histplot(train_df['Y house price of unit area'], bins=20, kde=True, ax=ax[0])
+ax[0].set_title('House Price Distribution')
+
+# Histogram for House Age
+sns.histplot(train_df['X2 house age'], bins=20, kde=True, ax=ax[1])
+ax[1].set_title('House Age Distribution')
+
+# Histogram for Distance to MRT
+sns.histplot(train_df['X3 distance to the nearest MRT station'], bins=20, kde=True, ax=ax[2])
+ax[2].set_title('Distance to MRT Distribution')
+
+st.pyplot(fig)
+
+# 4. Box Plots: House Price by Transaction Year and Month
+st.write("#### Box Plots")
+
+# Box plot for House Price vs Transaction Year
+fig, ax = plt.subplots(1, 2, figsize=(18, 6))
+sns.boxplot(x='transaction_year', y='Y house price of unit area', data=train_df, ax=ax[0])
+ax[0].set_title('House Price by Transaction Year')
+
+# Box plot for House Price vs Transaction Month
+sns.boxplot(x='transaction_month', y='Y house price of unit area', data=train_df, ax=ax[1])
+ax[1].set_title('House Price by Transaction Month')
+
+st.pyplot(fig)
+
+#=================Model Building: Linear Regression=========================#
+st.write("### Model Building: Linear Regression")
+
+# Prepare the features (X) and target (y) for training
+X_train = train_df[['X2 house age', 'X3 distance to the nearest MRT station', 'X4 number of convenience stores', 'transaction_year', 'transaction_month']]
+y_train = train_df['Y house price of unit area']
+
+X_test = test_df[['X2 house age', 'X3 distance to the nearest MRT station', 'X4 number of convenience stores', 'transaction_year', 'transaction_month']]
+y_test = test_df['Y house price of unit area']
+
+# Initialize and train the Linear Regression model
+lr_model = LinearRegression()
+lr_model.fit(X_train, y_train)
+
+# Make predictions on the test dataset
+y_pred_lr = lr_model.predict(X_test)
+
+#=================Model Evaluation=========================#
+# Calculate evaluation metrics for Linear Regression
+mae_lr = mean_absolute_error(y_test, y_pred_lr)
+mse_lr = mean_squared_error(y_test, y_pred_lr)
+rmse_lr = mean_squared_error(y_test, y_pred_lr, squared=False)
+r2_lr = r2_score(y_test, y_pred_lr)
+
+# Display the evaluation metrics
+st.write(f"### Linear Regression Model Evaluation")
+st.write(f"Mean Absolute Error (MAE): {mae_lr:.2f}")
+st.write(f"Mean Squared Error (MSE): {mse_lr:.2f}")
+st.write(f"Root Mean Squared Error (RMSE): {rmse_lr:.2f}")
+st.write(f"R-squared (R2): {r2_lr:.2f}")
+
+# Plotting Actual vs. Predicted for Linear Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_lr, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Linear Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+#=================Model Tuning: Ridge and Lasso=========================#
+st.write("### Model Tuning: Ridge and Lasso Regression")
+
+# Initialize and train Ridge and Lasso models
+ridge_model = Ridge(alpha=1.0)
+lasso_model = Lasso(alpha=0.1)
+
+ridge_model.fit(X_train, y_train)
+lasso_model.fit(X_train, y_train)
+
+# Make predictions with Ridge and Lasso
+y_pred_ridge = ridge_model.predict(X_test)
+y_pred_lasso = lasso_model.predict(X_test)
+
+# Evaluate Ridge
+mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
+r2_ridge = r2_score(y_test, y_pred_ridge)
+st.write(f"Ridge Model MAE: {mae_ridge:.2f}, R2: {r2_ridge:.2f}")
+
+# Plotting Actual vs. Predicted for Ridge Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_ridge, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Ridge Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+# Evaluate Lasso
+mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
+r2_lasso = r2_score(y_test, y_pred_lasso)
+st.write(f"Lasso Model MAE: {mae_lasso:.2f}, R2: {r2_lasso:.2f}")
+
+# Plotting Actual vs. Predicted for Lasso Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_lasso, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Lasso Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+#=================Additional Models: Decision Tree, Random Forest, Gradient Boosting=========================#
+st.write("### Additional Models: Decision Tree, Random Forest, Gradient Boosting")
+
+# Initialize models
+dt_model = DecisionTreeRegressor(random_state=42)
+rf_model = RandomForestRegressor(random_state=42)
+gb_model = GradientBoostingRegressor(random_state=42)
+
+# Train models
+dt_model.fit(X_train, y_train)
+rf_model.fit(X_train, y_train)
+gb_model.fit(X_train, y_train)
+
+# Predict with the models
+y_pred_dt = dt_model.predict(X_test)
+y_pred_rf = rf_model.predict(X_test)
+y_pred_gb = gb_model.predict(X_test)
+
+# Evaluate models (MAE and R2)
+mae_dt = mean_absolute_error(y_test, y_pred_dt)
+r2_dt = r2_score(y_test, y_pred_dt)
+
+mae_rf = mean_absolute_error(y_test, y_pred_rf)
+r2_rf = r2_score(y_test, y_pred_rf)
+
+mae_gb = mean_absolute_error(y_test, y_pred_gb)
+r2_gb = r2_score(y_test, y_pred_gb)
+
+# Display the evaluation metrics
+st.write(f"Decision Tree MAE: {mae_dt:.2f}, R2: {r2_dt:.2f}")
+st.write(f"Random Forest MAE: {mae_rf:.2f}, R2: {r2_rf:.2f}")
+st.write(f"Gradient Boosting MAE: {mae_gb:.2f}, R2: {r2_gb:.2f}")
+
+# Plotting Actual vs. Predicted for Decision Tree Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_dt, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Decision Tree Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+# Plotting Actual vs. Predicted for Random Forest Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_rf, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Random Forest Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+# Plotting Actual vs. Predicted for Gradient Boosting Regression
+fig, ax = plt.subplots(figsize=(10, 6))
+ax.scatter(y_test, y_pred_gb, alpha=0.5)
+ax.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
+ax.set_title("Actual vs Predicted - Gradient Boosting Regression")
+ax.set_xlabel("Actual House Price")
+ax.set_ylabel("Predicted House Price")
+st.pyplot(fig)
+
+#=================Cross-Validation=========================#
+st.write("### Cross-Validation with Random Forest")
+
+# Cross-validation with Random Forest
+rf_cv_scores = cross_val_score(rf_model, X_train, y_train, cv=5, scoring='neg_mean_absolute_error')
+st.write(f"Random Forest Cross-Validation MAE: {(-rf_cv_scores.mean()):.2f}")
\ No newline at end of file
diff --git a/Time_series.py b/Time_series.py
new file mode 100644
index 0000000..fc9752e
--- /dev/null
+++ b/Time_series.py
@@ -0,0 +1,234 @@
+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from statsmodels.tsa.statespace.sarimax import SARIMAX
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+import numpy as np
+from statsmodels.tsa.seasonal import seasonal_decompose
+from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
+
+#============= Streamlit Title and Description ======================================
+
+st.title("Time Series Analysis and Forecasting of Coffee Sales")
+st.write("""
+### Project Overview:
+This analysis aims to study coffee sales data collected from vending machines. 
+The purpose is to uncover underlying patterns in the data and forecast future sales to assist in inventory and financial planning. 
+**SARIMA model** was chosen for its ability to handle seasonality in the time series data.
+
+- **Data**: The dataset includes daily coffee sales with features such as payment type, coffee type, and transaction time.
+- **Objective**: The primary goal is to predict future coffee sales to optimize inventory and stock management.
+""")
+
+#============= 1- Data Preprocessing ======================================
+
+# Load the dataset
+file_path = r"C:\Users\71591\Desktop\dataset\Train Coffee Sales.csv"
+df = pd.read_csv(file_path)
+
+# Convert 'date' and 'datetime' columns to datetime format
+df['date'] = pd.to_datetime(df['date'])
+df['datetime'] = pd.to_datetime(df['datetime'])
+
+# Handle missing values in the 'card' column using forward fill method
+df['card'] = df['card'].ffill()
+
+# Aggregate total sales by coffee type and payment method before encoding
+sales_by_coffee_type = df.groupby('coffee_name')['money'].sum().reset_index()
+sales_by_payment_method = df.groupby('cash_type')['money'].sum().reset_index()
+
+# Merge these features back into the original dataframe before encoding
+df = df.merge(sales_by_coffee_type, on='coffee_name', how='left', suffixes=('', '_total_by_coffee'))
+df = df.merge(sales_by_payment_method, on='cash_type', how='left', suffixes=('', '_total_by_payment'))
+
+# One-hot encode the 'cash_type' and 'coffee_name' columns
+df_encoded = pd.get_dummies(df, columns=['cash_type', 'coffee_name'])
+
+#============= Data Visualization ======================================
+
+# Time Series Plot - Sales over time
+st.subheader('Time Series Plot: Coffee Sales Over Time')
+plt.figure(figsize=(10, 6))
+plt.plot(df_encoded['datetime'], df_encoded['money'])
+plt.title('Coffee Sales Over Time')
+plt.xlabel('Date')
+plt.ylabel('Sales (Money)')
+st.pyplot(plt)
+
+# Sales by Coffee Type
+st.subheader('Bar Plot: Total Sales by Coffee Type')
+plt.figure(figsize=(8, 5))
+df.groupby('coffee_name')['money'].sum().plot(kind='bar')
+plt.title('Total Sales by Coffee Type')
+plt.xlabel('Coffee Type')
+plt.ylabel('Total Sales')
+plt.xticks(rotation=45)
+st.pyplot(plt)
+
+# Sales by Payment Method
+st.subheader('Bar Plot: Total Sales by Payment Method')
+plt.figure(figsize=(8, 5))
+df.groupby('cash_type')['money'].sum().plot(kind='bar')
+plt.title('Total Sales by Payment Method')
+plt.xlabel('Payment Method')
+plt.ylabel('Total Sales')
+plt.xticks(rotation=45)
+st.pyplot(plt)
+
+# Distribution of Sales (money)
+st.subheader('Distribution Plot: Coffee Sales')
+plt.figure(figsize=(8, 5))
+df_encoded['money'].plot(kind='hist', bins=20, edgecolor='black')
+plt.title('Distribution of Coffee Sales')
+plt.xlabel('Sales Amount (Money)')
+plt.ylabel('Frequency')
+st.pyplot(plt)
+
+#============= 2- Feature Engineering =====================================
+
+# Extract time-based features from 'datetime'
+df_encoded['hour'] = df_encoded['datetime'].dt.hour
+df_encoded['day_of_week'] = df_encoded['datetime'].dt.dayofweek
+df_encoded['month'] = df_encoded['datetime'].dt.month
+df_encoded['week_of_year'] = df_encoded['datetime'].dt.isocalendar().week
+
+# Create lag features and rolling statistics
+df_encoded['lag_1'] = df_encoded['money'].shift(1)
+df_encoded['rolling_mean_7'] = df_encoded['money'].rolling(window=7).mean()
+
+# Drop rows with NaN values created by shifting or rolling
+df_encoded.dropna(inplace=True)
+
+#============= Decomposition Plot ======================================
+
+st.subheader("Time Series Decomposition")
+st.write("""
+To understand the components of our coffee sales data, we decompose the time series into **trend**, **seasonality**, and **residual** components.
+This allows us to observe the underlying patterns that are influencing sales performance over time.
+""")
+
+# Perform seasonal decomposition of the time series
+decomposition = seasonal_decompose(df_encoded['money'], model='additive', period=7)
+fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10, 8))
+
+# Plot the decomposition components
+decomposition.observed.plot(ax=ax1, title='Observed', legend=False)
+decomposition.trend.plot(ax=ax2, title='Trend', legend=False)
+decomposition.seasonal.plot(ax=ax3, title='Seasonality', legend=False)
+decomposition.resid.plot(ax=ax4, title='Residuals', legend=False)
+
+plt.tight_layout()
+st.pyplot(fig)
+
+#============= ACF and PACF Plots ======================================
+
+st.subheader("ACF and PACF Plots")
+st.write("""
+The **ACF (Autocorrelation Function)** and **PACF (Partial Autocorrelation Function)** plots are used to identify the presence of any autoregressive or moving average components in the time series.
+These plots help in selecting the appropriate lags for our SARIMA model.
+""")
+
+# Plot ACF and PACF
+fig_acf, ax_acf = plt.subplots(1, 1, figsize=(10, 4))
+plot_acf(df_encoded['money'], lags=40, ax=ax_acf)
+st.pyplot(fig_acf)
+
+fig_pacf, ax_pacf = plt.subplots(1, 1, figsize=(10, 4))
+plot_pacf(df_encoded['money'], lags=40, ax=ax_pacf)
+st.pyplot(fig_pacf)
+
+#============== 3- Model Building and Evaluation ============================
+
+st.subheader("SARIMA Model Building and Evaluation")
+
+# Load the training data (already preprocessed and encoded)
+df_train = df_encoded.copy()  # Use the encoded train data from preprocessing
+
+# Load the test data
+df_test = pd.read_csv(r"C:\Users\71591\Desktop\dataset\Test Coffee Sales.csv")
+
+# Convert 'datetime' column to datetime format and set it as index for the test data
+df_test['datetime'] = pd.to_datetime(df_test['datetime'])
+df_test.set_index('datetime', inplace=True)
+
+# Extract the 'money' column for training and testing
+y_train = df_train['money']
+y_test = df_test['money']
+
+# Step 1: Fit the SARIMA model on the training data
+sarima = SARIMAX(y_train, order=(1,2,1), seasonal_order=(1,1,1,7))
+sarima_fit = sarima.fit()
+
+# Print model summary
+st.write(sarima_fit.summary())
+
+# Step 2: Make predictions on the test set
+y_pred = sarima_fit.predict(start=len(y_train), end=len(y_train) + len(y_test) - 1, dynamic=False)
+
+# Step 3: Calculate evaluation metrics
+mae = mean_absolute_error(y_test, y_pred)
+rmse = np.sqrt(mean_squared_error(y_test, y_pred))
+r2 = r2_score(y_test, y_pred)
+
+st.write(f"Mean Absolute Error: {mae:.2f}")
+st.write(f"Root Mean Squared Error: {rmse:.2f}")
+st.write(f"R Squared Error: {r2:.2f}")
+
+# Step 4: Plot the results for Training, Test, and Predicted data
+
+# Reset index for both train and test data to ensure proper alignment
+df_train = df_train.reset_index()
+df_test = df_test.reset_index()
+
+# Create a combined dataframe for proper x-axis alignment
+fig, ax = plt.subplots(figsize=(10, 5))
+
+# Plot the actual training data
+ax.plot(df_train['datetime'], y_train, label='Training Data', color='blue')
+
+# Plot the actual test data
+ax.plot(df_test['datetime'], y_test, label='Test Data', color='green')
+
+# Plot the predictions for the test data (predictions must align with test data)
+ax.plot(df_test['datetime'], y_pred, label='Predictions', color='red')
+
+# Formatting the plot
+plt.gcf().autofmt_xdate()
+ax.set_title('SARIMA Model Predictions vs Actual Sales')
+ax.set_xlabel('Date')
+ax.set_ylabel('Sales (Money)')
+ax.legend()
+st.pyplot(fig)
+
+#=======================4-Forecasting========================#
+st.subheader("Forecasting Future Coffee Sales")
+
+n_steps = 30  # number of future steps to forecast (e.g., 30 days)
+forecast = sarima_fit.get_forecast(steps=n_steps)
+
+# Extract forecasted values
+forecasted_values = forecast.predicted_mean
+confidence_intervals = forecast.conf_int()
+
+# Generate future dates for forecasting
+last_date = df_test['datetime'].max()
+forecast_dates = pd.date_range(last_date, periods=n_steps + 1, freq='D')[1:]
+
+# Plot the forecasted values with confidence intervals
+fig_forecast, ax_forecast = plt.subplots(figsize=(10, 5))
+
+# Plot the forecasted values
+ax_forecast.plot(forecast_dates, forecasted_values, label='Forecasted Sales', color='orange')
+
+# Plot confidence intervals
+ax_forecast.fill_between(forecast_dates, confidence_intervals.iloc[:, 0], confidence_intervals.iloc[:, 1], color='orange', alpha=0.3)
+
+# Formatting the plot
+plt.gcf().autofmt_xdate()
+ax_forecast.set_title('Forecasted Coffee Sales for Next 30 Days')
+ax_forecast.set_xlabel('Date')
+ax_forecast.set_ylabel('Sales (Money)')
+ax_forecast.legend()
+st.pyplot(fig_forecast)
+
+st.write(forecasted_values)