Plot Decision Boundary of a Classifier

DATASET is given by Stanford-CS299-ex2, and could be download here.

Logistic Regression

The code is modified from Stanford-CS299-ex2.

Language: Matlab

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
%% ================== Initialization ==================
clear; close all; clc

data = load('data.txt');
X = data(:, [1, 2]); y = data(:, 3);
[m, n] = size(X);

fprintf(['Plotting data with + indicating (y = 1) examples and o ' ...
'indicating (y = 0) examples.\n']);
plotData(X, y);

fprintf('\nProgram paused. Press enter to continue.\n');
pause;
close all;
%% ============== Optimizing using fminunc =============
% Set options for fminunc
options = optimset('GradObj', 'on', 'MaxIter', 400);

% Run fminunc to obtain the optimal theta
X = [ones(m, 1) X];
initial_theta = zeros(n + 1, 1);
[theta, cost] = ...
fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);

% Plot Boundary
plotDecisionBoundary(theta, X, y);

Nearest-Neighbor Methods

Referring to Related Question in StackExchange

Language: R

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
require(class)

# Read in data
RawData <- read.csv("D:/Machine Learning/Classification/data.txt", header=FALSE)
x <- data.frame(x1 = RawData$V1, x2 = RawData$V2)
g <- RawData$V3

# Generate points to be classified
px1 <- seq(min(x$x1)-2, max(x$x1)+2, (max(x$x1)-min(x$x1))/50)
px2 <- seq(min(x$x2)-2, max(x$x2)+2, (max(x$x2)-min(x$x2))/50)
xnew <- expand.grid(px1, px2)

# KNN Classification
k <- 15
mod <- knn(x, xnew, g, k, prob = TRUE)

# Probability of each point
prob <- attr(mod, "prob")
prob <- ifelse(mod=="1", prob, 1-prob)
prob <- matrix(prob, length(px1), length(px2))

# Set margin of the plot
par(mar = rep(2,4))

# Plot the decision boundary
contour(px1, px2, prob, levels=0.5, labels="", xlab="", ylab="",
main = paste(as.character(k), "-Nearest Neighbor Classifer"), axes=FALSE)
points(x, col = ifelse(g==1, "coral", "cornflowerblue"))
points(xnew, pch=".", cex=1.2, col = ifelse(prob>0.5, "coral", "cornflowerblue"))
box()

AdaBoost

Language: Python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

data = np.loadtxt("data.txt", delimiter=',')
dim = data.shape

X = data[:, :dim[1]-1]
y = data[:, dim[1]-1]

bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
algorithm="SAMME", n_estimators=200)

bdt.fit(X, y)

# ploting parameters setting
plot_colors = "br"
plot_step = 0.2
class_names = "AB"

# plot the decision boundary
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
np.arange(y_min, y_max, plot_step))
Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)

# plot the training points
for i, n, c in zip(range(2), class_names, plot_colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], c=c,
cmap=plt.cm.Paired, label="Class %s" % n)
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.legend(loc='upper right')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Decision Boundary')

plt.show()

Completed codes is avaliable here.