parent
4318663fd6
commit
6d498d3562
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,190 @@
|
|||||||
|
from csv import reader
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def load_csv(filename, skip = False):
|
||||||
|
dataset = list()
|
||||||
|
with open(filename, 'r', newline='') as file:
|
||||||
|
csv_reader = reader(file)
|
||||||
|
if skip:
|
||||||
|
next(csv_reader)
|
||||||
|
for row in csv_reader:
|
||||||
|
if not row:
|
||||||
|
continue
|
||||||
|
dataset.append(row)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
def diagnosis_column_to_number(dataset, column):
|
||||||
|
for row in dataset:
|
||||||
|
if row[column] == 'M':
|
||||||
|
row[column] = 0
|
||||||
|
elif row[column] == 'B':
|
||||||
|
row[column] = 1
|
||||||
|
def extract_only_x_data(dataset):
|
||||||
|
if len(dataset) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
data = list()
|
||||||
|
|
||||||
|
for i in range(0, len(dataset)):
|
||||||
|
data.append(list())
|
||||||
|
|
||||||
|
for j in range(0, len(dataset[i]) - 1):
|
||||||
|
data[-1].append(float(dataset[i][j]))
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def extract_only_y_data(dataset):
|
||||||
|
if len(dataset) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
data = list()
|
||||||
|
|
||||||
|
for i in range(0, len(dataset)):
|
||||||
|
data.append(int(dataset[i][-1]))
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def sigmoid(z):
|
||||||
|
z = 1 / (1 + np.exp(-z))
|
||||||
|
# Return the value of the implemented sigmoid function, do not simply return z
|
||||||
|
return z
|
||||||
|
|
||||||
|
def loss(y, y_hat):
|
||||||
|
epsilon = 1e-9 # small value to prevent log(0)
|
||||||
|
loss = -np.mean(y * np.log(y_hat + epsilon) + (1 - y) * np.log(1 - y_hat + epsilon))
|
||||||
|
# Return the value of the implemented loss function, do not simply return loss of zero
|
||||||
|
return loss
|
||||||
|
|
||||||
|
|
||||||
|
def gradients(X, y, y_hat):
|
||||||
|
# number of training examples.
|
||||||
|
number_of_examples = X.shape[0]
|
||||||
|
|
||||||
|
# Gradient of loss weights.
|
||||||
|
dw = (1 / number_of_examples) * np.dot(X.T, (y_hat - y))
|
||||||
|
|
||||||
|
# Gradient of loss bias.
|
||||||
|
db = (1 / number_of_examples) * np.sum((y_hat - y))
|
||||||
|
|
||||||
|
return dw, db
|
||||||
|
|
||||||
|
|
||||||
|
def train(X, y, batch_size, epochs, learning_rate):
|
||||||
|
number_of_examples, number_of_features = X.shape
|
||||||
|
|
||||||
|
print(number_of_examples)
|
||||||
|
print(number_of_features)
|
||||||
|
|
||||||
|
# Initializing weights and bias to zeros.
|
||||||
|
weights = np.zeros((number_of_features, 1))
|
||||||
|
bias = 0
|
||||||
|
|
||||||
|
# Reshaping y.
|
||||||
|
y = y.reshape(number_of_examples, 1)
|
||||||
|
|
||||||
|
# Empty list to store losses.
|
||||||
|
losses = []
|
||||||
|
|
||||||
|
# Training loop.
|
||||||
|
for epoch in range(epochs):
|
||||||
|
for i in range((number_of_examples - 1) // batch_size + 1):
|
||||||
|
# Defining batches. SGD.
|
||||||
|
start_i = i * batch_size
|
||||||
|
end_i = start_i + batch_size
|
||||||
|
xb = X[start_i:end_i]
|
||||||
|
yb = y[start_i:end_i]
|
||||||
|
|
||||||
|
print(xb)
|
||||||
|
|
||||||
|
# Calculating hypothesis/prediction.
|
||||||
|
y_hat = sigmoid(np.dot(xb, weights) + bias)
|
||||||
|
|
||||||
|
# Getting the gradients of loss w.r.t parameters.
|
||||||
|
dw, db = gradients(xb, yb, y_hat)
|
||||||
|
|
||||||
|
# Updating the parameters.
|
||||||
|
weights -= learning_rate * dw
|
||||||
|
bias -= learning_rate * db
|
||||||
|
|
||||||
|
# Calculating loss and appending it in the list.
|
||||||
|
l = loss(y, sigmoid(np.dot(X, weights) + bias))
|
||||||
|
losses.append(l)
|
||||||
|
|
||||||
|
# returning weights, bias and losses(List).
|
||||||
|
return weights, bias, losses
|
||||||
|
|
||||||
|
# Make the predictions.
|
||||||
|
|
||||||
|
def predict(X, w, b):
|
||||||
|
# X Input.
|
||||||
|
|
||||||
|
# Calculating presictions/y_hat.
|
||||||
|
preds = sigmoid(np.dot(X, w) + b)
|
||||||
|
|
||||||
|
# Empty List to store predictions.
|
||||||
|
pred_class = []
|
||||||
|
|
||||||
|
# if y_hat >= 0.5 round up to 1
|
||||||
|
# if y_hat < 0.5 round down to 0
|
||||||
|
|
||||||
|
for i in preds:
|
||||||
|
pred_class.append(1 if i >= 0.5 else 0)
|
||||||
|
return np.array(pred_class)
|
||||||
|
|
||||||
|
# Obtain the accuracy.
|
||||||
|
|
||||||
|
def accuracy(y, y_hat):
|
||||||
|
|
||||||
|
accuracy = np.sum(y == y_hat) / len(y)
|
||||||
|
return accuracy
|
||||||
|
|
||||||
|
# Output the plot.
|
||||||
|
|
||||||
|
def plot_decision_boundary(X, w, b):
|
||||||
|
# X Inputs
|
||||||
|
# w weights
|
||||||
|
# b bias
|
||||||
|
|
||||||
|
fig = plt.figure(figsize=(10, 8))
|
||||||
|
plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], "g^")
|
||||||
|
plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], "bs")
|
||||||
|
plt.xlim([-2, 2])
|
||||||
|
plt.ylim([0, 2.2])
|
||||||
|
plt.xlabel("feature 1")
|
||||||
|
plt.ylabel("feature 2")
|
||||||
|
plt.title('Decision Boundary')
|
||||||
|
|
||||||
|
# The Line is y=mx+c
|
||||||
|
# So, Equate mx+c = w.X + b
|
||||||
|
# Solving we find m and c
|
||||||
|
x1 = [min(X[:, 0]), max(X[:, 0])]
|
||||||
|
|
||||||
|
if (w[1] != 0):
|
||||||
|
m = -w[0] / w[1]
|
||||||
|
c = -b / w[1]
|
||||||
|
x2 = m * x1 + c
|
||||||
|
plt.plot(x1, x2, 'y-')
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
### Evaluate the algorithm.
|
||||||
|
|
||||||
|
filename = 'breast_cancer_data.csv'
|
||||||
|
dataset = load_csv(filename, skip=True)
|
||||||
|
|
||||||
|
diagnosis_column_to_number(dataset, 2)
|
||||||
|
|
||||||
|
X_train_data = extract_only_x_data(dataset)
|
||||||
|
y_train_data = extract_only_y_data(dataset)
|
||||||
|
|
||||||
|
X = np.array(X_train_data)
|
||||||
|
y = np.array(y_train_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Training
|
||||||
|
w, b, l = train(X, y, batch_size=100, epochs=1000, learning_rate=0.01)
|
||||||
|
# Plotting Decision Boundary
|
||||||
|
plot_decision_boundary(X, w, b)
|
||||||
|
|
||||||
|
accuracy(y, y_hat=predict(X, w, b))
|
||||||
Loading…
Reference in New Issue