#PRACTICAL 2
# ================================
# IMPORT LIBRARIES
# ================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


# ================================
# LOAD DATASET
# ================================
# Replace with your actual file path
data = pd.read_csv("https://raw.githubusercontent.com/krishnaik06/simple-Linear-Regression/master/Salary_Data.csv")


# ================================
# SIMPLE LINEAR REGRESSION
# (Salary vs Experience)
# ================================
# Features and target
X_simple = data[['YearsExperience']].values
y_simple = data['Salary'].values

# Train-test split
X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(
    X_simple, y_simple, test_size=0.5, random_state=0
)

# Model training
model_simple = LinearRegression()
model_simple.fit(X_train_s, y_train_s)

# Predictions
y_pred_s = model_simple.predict(X_test_s)
y_train_pred_s = model_simple.predict(X_train_s)

# Output
print("=== Simple Linear Regression ===")
print("Predicted Test Values:", y_pred_s)
print("Predicted Train Values:", y_train_pred_s)

# Visualization
plt.figure()
plt.scatter(X_train_s, y_train_s, color='red', label='Training Data')
plt.plot(X_train_s, y_train_pred_s, color='blue', label='Regression Line')

plt.title("Salary vs Experience")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")

plt.legend()
plt.show()


# ============================================
# IMPORT LIBRARIES
# ============================================
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


# ============================================
# LOAD DATASET (STOCK MARKET)
# ============================================
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv")

# Rename columns for simplicity
df.rename(columns={
    'AAPL.Open': 'Open',
    'AAPL.High': 'High',
    'AAPL.Low': 'Low',
    'AAPL.Close': 'Close',
    'AAPL.Volume': 'Volume'
}, inplace=True)

# Remove missing values
df = df.dropna()


# ============================================
# FEATURES AND TARGET
# ============================================
X = df[['Open', 'High', 'Low', 'Close']]
y = df['Volume']


# ============================================
# TRAIN TEST SPLIT
# ============================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# ============================================
# MODEL TRAINING
# ============================================
model = LinearRegression()
model.fit(X_train, y_train)


# ============================================
# PREDICTIONS
# ============================================
y_pred = model.predict(X_test)


# ============================================
# OUTPUT RESULTS
# ============================================
print("=== Multiple Linear Regression ===")
print("Intercept:", model.intercept_)
print("Coefficients:", dict(zip(X.columns, model.coef_)))

print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


# ============================================
# GRAPH 1: ACTUAL vs PREDICTED
# ============================================
plt.figure(figsize=(10, 6))

plt.plot(y_test.values, label="Actual Volume")
plt.plot(y_pred, label="Predicted Volume")

plt.xlabel("Sample Index")
plt.ylabel("Volume")
plt.title("Stock Volume Prediction")

plt.legend()
plt.show()


# ============================================
# GRAPH 2: SCATTER + REGRESSION LINE
# (Volume vs High)
# ============================================
model2 = LinearRegression()
model2.fit(df[['Volume']], df[['High']])

line = model2.predict(df[['Volume']])

plt.figure(figsize=(8, 5))

plt.scatter(df['Volume'], df['High'], label="Data Points")
plt.plot(df['Volume'], line, color='red', label="Regression Line")

plt.xlabel("Volume")
plt.ylabel("High")
plt.title("Volume vs High")

plt.legend()
plt.show()


#Pract 4:  Demonstrate of Logistic Regression

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = pd.read_csv("student_exam_real_life_1000.csv")
# Feature (only study_hours for S-type curve)
X = data[['study_hours']]
y = data['passed_exam']
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
#  Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
#  Generate smooth X for sigmoid curve
x_range = np.linspace(data['study_hours'].min(),
                      data['study_hours'].max(), 300).reshape(-1, 1)
# Predicted probability
y_prob = model.predict_proba(x_range)[:, 1]
# Separate actual pass & fail points
fail = data[data['passed_exam'] == 0]
passs = data[data['passed_exam'] == 1]
plt.figure(figsize=(8, 5))
# Sigmoid curve
plt.plot(x_range, y_prob, linewidth=2)
# Red dots (fail)
plt.scatter(fail['study_hours'], [0]*len(fail),
            color='red', s=40, label="Fail (0)")
# Red dots (pass)
plt.scatter(passs['study_hours'], [1]*len(passs),
            color='red', s=40, label="Pass (1)")
# Decision threshold point (0.5 probability)
mid_x = model.predict_proba([[np.median(data['study_hours'])]])[0][1]
plt.axhline(0.5, linestyle='--', color='gray')
plt.xlabel("Study Hours")
plt.ylabel("Probability of Passing")
plt.title("Logistic Regression Sigmoid Curve (Using Student Dataset)")
plt.ylim(-0.1, 1.1)
plt.legend()
plt.show()
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))



#Practical 4:

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

delivery_time = np.array([
    32, 34, 29, 31, 35,
    36, 30, 33, 34, 32,
    31, 37, 38, 33, 34,
    35, 36, 32, 31, 34
])

mu = 30
alpha = 0.05

# t-test
t_stat, p_two = stats.ttest_1samp(delivery_time, mu)
p_value = p_two / 2

print("Mean:", delivery_time.mean())
print("t-statistic:", t_stat)
print("p-value:", p_value)

# Plot
df = len(delivery_time) - 1
x = np.linspace(-4, 8, 1000)
y = stats.t.pdf(x, df)

critical_value = stats.t.ppf(1 - alpha, df)

plt.figure()
plt.plot(x, y, label="t-distribution")
plt.axvline(critical_value, color='red', label="Critical Value")
plt.axvline(t_stat, color='green', label="t-statistic")

plt.fill_between(x, y, where=(x >= t_stat), alpha=0.4)

plt.title("One-Sample t-Test (Right Tailed)")
plt.xlabel("t value")
plt.ylabel("Probability Density")
plt.legend()
plt.show()




Practical no:5
# Install required library (only needed in Colab)
!pip install statsmodels

from scipy import stats

# Marks of students
method_A = [60, 62, 64]
method_B = [70, 72, 74]
method_C = [80, 82, 84]

# One-way ANOVA
F_value, p_value = stats.f_oneway(method_A, method_B, method_C)

print("F-value:", F_value)
print("p-value:", p_value)

if p_value < 0.05:
    print("Reject Null Hypothesis: Significant difference exists")
else:
    print("Accept Null Hypothesis: No significant difference")


# Two way ANOVA
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt

data = {
    'Marks': [70, 72, 75, 78, 80, 82, 85, 88],
    'Method': ['A','A','A','A','B','B','B','B'],
    'Gender': ['Boy','Boy','Girl','Girl','Boy','Boy','Girl','Girl']
}

df = pd.DataFrame(data)
print(df)

# Two-way ANOVA model
model = ols('Marks ~ C(Method) + C(Gender) + C(Method):C(Gender)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print(anova_table)

df.boxplot(column='Marks', by=['Method', 'Gender'])
plt.title('Marks by Method and Gender')
plt.suptitle('')
plt.xlabel('Method & Gender')
plt.ylabel('Marks')
plt.show()


Practical no 6:
# ============================================
# IMPORT LIBRARIES
# ============================================
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
import pandas as pd


# ============================================
# DATASET
# ============================================
data = {
    "Age": [25, 45, 35, 50, 23, 40, 60, 48],
    "Income": [30000, 80000, 60000, 90000, 20000, 70000, 100000, 85000],
    "Credit_Score": [650, 720, 690, 750, 600, 710, 780, 730],
    "Loan_Approved": ["No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"]
}

df = pd.DataFrame(data)


# ============================================
# CONVERT TARGET TO NUMERIC
# ============================================
df["Loan_Approved"] = df["Loan_Approved"].map({"Yes": 1, "No": 0})


# ============================================
# FEATURES AND TARGET
# ============================================
X = df[["Age", "Income", "Credit_Score"]]
y = df["Loan_Approved"]


# ============================================
# MODEL
# ============================================
model = DecisionTreeClassifier(max_depth=3, random_state=42)


# ============================================
# FIT THE MODEL
# ============================================
model.fit(X, y)


# ============================================
# PLOT DECISION TREE
# ============================================
plt.figure(figsize=(12, 8))

plot_tree(
    model,
    feature_names=X.columns,
    class_names=["Not Approved", "Approved"],
    filled=True
)

plt.show()



# ============================================
# IMPORT LIBRARIES
# ============================================
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
import pandas as pd


# ============================================
# DATASET
# ============================================
data = {
    "Age": [25, 45, 35, 50, 23, 40, 60, 48],
    "Income": [30000, 80000, 60000, 90000, 20000, 70000, 100000, 85000],
    "Credit_Score": [650, 720, 690, 750, 600, 710, 780, 730],
    "Loan_Approved": ["No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"]
}

df = pd.DataFrame(data)


# ============================================
# CONVERT TARGET TO NUMERIC
# ============================================
df["Loan_Approved"] = df["Loan_Approved"].map({"Yes": 1, "No": 0})


# ============================================
# FEATURES AND TARGET
# ============================================
X = df[["Age", "Income", "Credit_Score"]]
y = df["Loan_Approved"]


# ============================================
# MODEL
# ============================================
model = DecisionTreeClassifier(max_depth=3, random_state=42)


# ============================================
# FIT THE MODEL
# ============================================
model.fit(X, y)


# ============================================
# PLOT DECISION TREE
# ============================================
plt.figure(figsize=(12, 8))

plot_tree(
    model,
    feature_names=X.columns,
    class_names=["Not Approved", "Approved"],
    filled=True
)

plt.show()