from csv import reader
import numpy as np
import matplotlib.pyplot as plt

def load_csv(filename, skip = False):
    dataset = list()
    with open(filename, 'r', newline='') as file:
        csv_reader = reader(file)
        if skip:
            next(csv_reader)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

def diagnosis_column_to_number(dataset, column):
    for row in dataset:
        if row[column] == 'M':
            row[column] = 0
        elif row[column] == 'B':
            row[column] = 1
def extract_only_x_data(dataset):
    if len(dataset) == 0:
        return

    data = list()

    for i in range(0, len(dataset)):
        data.append(list())

        for j in range(0, len(dataset[i]) - 1):
            data[-1].append(float(dataset[i][j]))

    return data

def extract_only_y_data(dataset):
    if len(dataset) == 0:
        return

    data = list()

    for i in range(0, len(dataset)):
        data.append(int(dataset[i][-1]))

    return data

def sigmoid(z):
    z = 1 / (1 + np.exp(-z))
    # Return the value of the implemented sigmoid function, do not simply return z
    return z

def loss(y, y_hat):
    epsilon = 1e-9  # small value to prevent log(0)
    loss = -np.mean(y * np.log(y_hat + epsilon) + (1 - y) * np.log(1 - y_hat + epsilon))
    # Return the value of the implemented loss function, do not simply return loss of zero
    return loss


def gradients(X, y, y_hat):
    # number of training examples.
    number_of_examples = X.shape[0]

    # Gradient of loss weights.
    dw = (1 / number_of_examples) * np.dot(X.T, (y_hat - y))

    # Gradient of loss bias.
    db = (1 / number_of_examples) * np.sum((y_hat - y))

    return dw, db


def train(X, y, batch_size, epochs, learning_rate):
    number_of_examples, number_of_features = X.shape

    print(number_of_examples)
    print(number_of_features)

    # Initializing weights and bias to zeros.
    weights = np.zeros((number_of_features, 1))
    bias = 0

    # Reshaping y.
    y = y.reshape(number_of_examples, 1)

    # Empty list to store losses.
    losses = []

    # Training loop.
    for epoch in range(epochs):
        for i in range((number_of_examples - 1) // batch_size + 1):
            # Defining batches. SGD.
            start_i = i * batch_size
            end_i = start_i + batch_size
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]

            print(xb)

            # Calculating hypothesis/prediction.
            y_hat = sigmoid(np.dot(xb, weights) + bias)

            # Getting the gradients of loss w.r.t parameters.
            dw, db = gradients(xb, yb, y_hat)

            # Updating the parameters.
            weights -= learning_rate * dw
            bias -= learning_rate * db

        # Calculating loss and appending it in the list.
        l = loss(y, sigmoid(np.dot(X, weights) + bias))
        losses.append(l)

    # returning weights, bias and losses(List).
    return weights, bias, losses

# Make the predictions.

def predict(X, w, b):
    # X Input.

    # Calculating presictions/y_hat.
    preds = sigmoid(np.dot(X, w) + b)

    # Empty List to store predictions.
    pred_class = []

    # if y_hat >= 0.5 round up to 1
    # if y_hat < 0.5 round down to 0

    for i in preds:
        pred_class.append(1 if i >= 0.5 else 0)
    return np.array(pred_class)

# Obtain the accuracy.

def accuracy(y, y_hat):

    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

# Output the plot.

def plot_decision_boundary(X, w, b):
    # X Inputs
    # w weights
    # b bias

    fig = plt.figure(figsize=(10, 8))
    plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], "g^")
    plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], "bs")
    plt.xlim([-2, 2])
    plt.ylim([0, 2.2])
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")
    plt.title('Decision Boundary')

    # The Line is y=mx+c
    # So, Equate mx+c = w.X + b
    # Solving we find m and c
    x1 = [min(X[:, 0]), max(X[:, 0])]

    if (w[1] != 0):
        m = -w[0] / w[1]
        c = -b / w[1]
        x2 = m * x1 + c
        plt.plot(x1, x2, 'y-')

    plt.show()

### Evaluate the algorithm.

filename = 'breast_cancer_data.csv'
dataset = load_csv(filename, skip=True)

diagnosis_column_to_number(dataset, 2)

X_train_data = extract_only_x_data(dataset)
y_train_data = extract_only_y_data(dataset)

X = np.array(X_train_data)
y = np.array(y_train_data)


# Training
w, b, l = train(X, y, batch_size=100, epochs=1000, learning_rate=0.01)
# Plotting Decision Boundary
plot_decision_boundary(X, w, b)

accuracy(y, y_hat=predict(X, w, b))