Q3 fin

Q4 Start
2 years ago · 6d498d3562
parent 4318663fd6
commit 6d498d3562
5 changed files with 2033 additions and 9 deletions
--- a/Assignment+3A.ipynb
+++ b/Assignment+3A.ipynb
--- a/Q3.py
+++ b/Q3.py
@ -0,0 +1,190 @@
 from csv import reader
 import numpy as np
 import matplotlib.pyplot as plt
 def load_csv(filename, skip = False):
    dataset = list()
    with open(filename, 'r', newline='') as file:
        csv_reader = reader(file)
        if skip:
            next(csv_reader)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset
 def diagnosis_column_to_number(dataset, column):
    for row in dataset:
        if row[column] == 'M':
            row[column] = 0
        elif row[column] == 'B':
            row[column] = 1
 def extract_only_x_data(dataset):
    if len(dataset) == 0:
        return
    data = list()
    for i in range(0, len(dataset)):
        data.append(list())
        for j in range(0, len(dataset[i]) - 1):
            data[-1].append(float(dataset[i][j]))
    return data
 def extract_only_y_data(dataset):
    if len(dataset) == 0:
        return
    data = list()
    for i in range(0, len(dataset)):
        data.append(int(dataset[i][-1]))
    return data
 def sigmoid(z):
    z = 1 / (1 + np.exp(-z))
    # Return the value of the implemented sigmoid function, do not simply return z
    return z
 def loss(y, y_hat):
    epsilon = 1e-9  # small value to prevent log(0)
    loss = -np.mean(y * np.log(y_hat + epsilon) + (1 - y) * np.log(1 - y_hat + epsilon))
    # Return the value of the implemented loss function, do not simply return loss of zero
    return loss
 def gradients(X, y, y_hat):
    # number of training examples.
    number_of_examples = X.shape[0]
    # Gradient of loss weights.
    dw = (1 / number_of_examples) * np.dot(X.T, (y_hat - y))
    # Gradient of loss bias.
    db = (1 / number_of_examples) * np.sum((y_hat - y))
    return dw, db
 def train(X, y, batch_size, epochs, learning_rate):
    number_of_examples, number_of_features = X.shape
    print(number_of_examples)
    print(number_of_features)
    # Initializing weights and bias to zeros.
    weights = np.zeros((number_of_features, 1))
    bias = 0
    # Reshaping y.
    y = y.reshape(number_of_examples, 1)
    # Empty list to store losses.
    losses = []
    # Training loop.
    for epoch in range(epochs):
        for i in range((number_of_examples - 1) // batch_size + 1):
            # Defining batches. SGD.
            start_i = i * batch_size
            end_i = start_i + batch_size
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            print(xb)
            # Calculating hypothesis/prediction.
            y_hat = sigmoid(np.dot(xb, weights) + bias)
            # Getting the gradients of loss w.r.t parameters.
            dw, db = gradients(xb, yb, y_hat)
            # Updating the parameters.
            weights -= learning_rate * dw
            bias -= learning_rate * db
        # Calculating loss and appending it in the list.
        l = loss(y, sigmoid(np.dot(X, weights) + bias))
        losses.append(l)
    # returning weights, bias and losses(List).
    return weights, bias, losses
 # Make the predictions.
 def predict(X, w, b):
    # X Input.
    # Calculating presictions/y_hat.
    preds = sigmoid(np.dot(X, w) + b)
    # Empty List to store predictions.
    pred_class = []
    # if y_hat >= 0.5 round up to 1
    # if y_hat < 0.5 round down to 0
    for i in preds:
        pred_class.append(1 if i >= 0.5 else 0)
    return np.array(pred_class)
 # Obtain the accuracy.
 def accuracy(y, y_hat):
    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy
 # Output the plot.
 def plot_decision_boundary(X, w, b):
    # X Inputs
    # w weights
    # b bias
    fig = plt.figure(figsize=(10, 8))
    plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], "g^")
    plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], "bs")
    plt.xlim([-2, 2])
    plt.ylim([0, 2.2])
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")
    plt.title('Decision Boundary')
    # The Line is y=mx+c
    # So, Equate mx+c = w.X + b
    # Solving we find m and c
    x1 = [min(X[:, 0]), max(X[:, 0])]
    if (w[1] != 0):
        m = -w[0] / w[1]
        c = -b / w[1]
        x2 = m * x1 + c
        plt.plot(x1, x2, 'y-')
    plt.show()
 ### Evaluate the algorithm.
 filename = 'breast_cancer_data.csv'
 dataset = load_csv(filename, skip=True)
 diagnosis_column_to_number(dataset, 2)
 X_train_data = extract_only_x_data(dataset)
 y_train_data = extract_only_y_data(dataset)
 X = np.array(X_train_data)
 y = np.array(y_train_data)
 # Training
 w, b, l = train(X, y, batch_size=100, epochs=1000, learning_rate=0.01)
 # Plotting Decision Boundary
 plot_decision_boundary(X, w, b)
 accuracy(y, y_hat=predict(X, w, b))
--- a/Q4.py
+++ b/Q4.py
--- a/Test.txt
+++ b/Test.txt
@ -1 +0,0 @@
 Test
--- a/readme.md
+++ b/readme.md
@ -3,13 +3,10 @@ Assignment 3
 Done: 
 ---
-1. Q1
+- [x] Q1
-2. Q2 
+- [x] Q2 
-
+- [x] Q3
---
+- [ ] Q4
 ToDo:
 1. Q3
 2. Q4
 ---
 * Submit once after init run
@ -21,4 +18,6 @@ ToDo:
 * * Resubmit
 ---
-<h1>Profit?</h1>
+<h1>Profit?</h1>
 ![img.png](img.png)