Polynomial regression

Last updated on Apr 14, 2020 8 min read

Assignment1 —— Polynomial regression

assignment

Hengchao Wang 1001778272

import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
import sklearn
import prettytable as pt
import numpy as np
import tensorflow as tf

from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.preprocessing import normalize,StandardScaler,PolynomialFeatures  
from sklearn import model_selection
from sklearn.linear_model import LinearRegression

res = {}

a. Generate 20 data pairs (X, Y) using y = sin(2piX) + N

x_sin = np.linspace(0,1,200)
x = np.random.uniform(0,1,20)
d = np.random.normal(loc=0,scale=0.2,size=20)    # N from the normal gaussian distribution 
print(d)
y_sin = 2*math.pi*x_sin
y = 2*math.pi*x
print(y)
for i in range(200):
    y_sin[i] = math.sin(y_sin[i])    
for i in range(20):
    y[i] = math.sin(y[i])+ d[i]

[-0.13511273  0.18468915 -0.10182451  0.18664012  0.09603502  0.08406724
 -0.18322119 -0.1548708  -0.26950395  0.11386059 -0.03197565 -0.31144544
 -0.15728101  0.04475068 -0.17858001 -0.01282772 -0.01094735 -0.19418843
  0.25439552  0.09524588]
[3.68190648 2.32612886 2.48807293 0.39913587 0.79724111 5.04777546
 4.82605602 1.176247   3.74257331 2.22257323 2.96313363 4.87029127
 2.83184172 3.03681068 2.56122951 5.13146468 1.54140014 5.83297157
 3.19685989 4.36359448]

plt.plot(x_sin, y_sin, "r-")  # original sin function curve
plt.scatter(x, y)

<matplotlib.collections.PathCollection at 0x1a46019490>

png

data_1 = {'X':x, 'Y':y}
data = pd.DataFrame(data = data_1, dtype=np.int8)
data

	X	Y
0	0.000000	-0.068898
1	0.052632	0.351448
2	0.105263	0.860871
3	0.157895	0.785289
4	0.210526	1.009282
5	0.263158	0.782601
6	0.315789	0.785705
7	0.368421	0.505319
8	0.421053	0.465961
9	0.473684	0.127958
10	0.526316	-0.252053
11	0.578947	-0.381237
12	0.631579	-0.598058
13	0.684211	-0.581304
14	0.736842	-1.205981
15	0.789474	-0.735465
16	0.842105	-0.940200
17	0.894737	-0.700487
18	0.947368	-0.619352
19	1.000000	-0.152530

X_train,X_test, Y_train, Y_test =model_selection.train_test_split(x, y, test_size=0.5, random_state=3)

train = {'X':X_train, 'Y': Y_train}
train_data = pd.DataFrame(data = train, dtype=np.int8)
train_data

	X	Y
0	0.473684	0.127958
1	0.578947	-0.381237
2	1.000000	-0.152530
3	0.947368	-0.619352
4	0.684211	-0.581304
5	0.263158	0.782601
6	0.000000	-0.068898
7	0.421053	0.465961
8	0.157895	0.785289
9	0.526316	-0.252053

test = {'X':X_test, 'Y': Y_test}
test_data = pd.DataFrame(data = test, dtype=np.int8)
test_data

	X	Y
0	0.736842	-1.205981
1	0.105263	0.860871
2	0.052632	0.351448
3	0.894737	-0.700487
4	0.210526	1.009282
5	0.842105	-0.940200
6	0.315789	0.785705
7	0.368421	0.505319
8	0.789474	-0.735465
9	0.631579	-0.598058

plt.scatter(X_train, Y_train)
plt.scatter(X_test, Y_test, c = 'r')

<matplotlib.collections.PathCollection at 0x1a43793a10>

png

b. Using room mean square error, find weights of polynomial regression for order is 0, 1, 3, 9

def  polynomialRegression(i:int ) :
    polynomial = PolynomialFeatures(degree = i)# quadratic polynomial
    x_transformed = polynomial.fit_transform(X_train.reshape(10,1))
    poly_linear_model = LinearRegression()
    poly_linear_model.fit(x_transformed, Y_train)# train
    return polynomial, poly_linear_model

weights of polynomial regression for order is 0

polynomial_0, poly_linear_model_0 = polynomialRegression(0) 

coef = poly_linear_model_0.coef_
tmp = [0]*10
for i in range(len(coef)) :
    tmp[i] = int(coef[i])
res['0'] = tmp
coef

array([0.])

weights of polynomial regression for order is 1


polynomial_1, poly_linear_model_1 = polynomialRegression(1)
 
coef = poly_linear_model_1.coef_
tmp = [0]*10
for i in range(len(coef)) :
    tmp[i] = int(coef[i])
res['1'] = tmp
coef

array([ 0.        , -1.03980292])

weights of polynomial regression for order is 3

polynomial_3, poly_linear_model_3 = polynomialRegression(3)
 
coef = poly_linear_model_3.coef_
tmp = [0]*10
for i in range(len(coef)) :
    tmp[i] = int(coef[i])
res['3'] = tmp
coef

array([  0.        ,   9.05355623, -26.39654147,  17.21077736])

weights of polynomial regression for order is 9

polynomial_9, poly_linear_model_9 = polynomialRegression(9)
coef = poly_linear_model_9.coef_
tmp = [0]*10
for i in range(len(coef)) :
    tmp[i] = int(coef[i])
res['9'] = tmp
coef

array([ 0.00000000e+00, -4.40001497e+02,  9.05594258e+03, -7.51956543e+04,
        3.35616148e+05, -8.87788711e+05,  1.43224233e+06, -1.38140936e+06,
        7.30679050e+05, -1.62759822e+05])

c. Display weights in table

from prettytable import PrettyTable
x= PrettyTable()
x.add_column("label\order", ["W0","W1","W2","W3","W4","W5","W6","W7","W8","W9"])
x.add_column("0", res["0"])
x.add_column("1", res["1"])
x.add_column("3", res["3"])
x.add_column("9", res["9"])
print(x)
# the label 0, W0 in the table is the weights of polynomial regression for order is 0
# the label 1, W0 and W1 in the table is the weights of polynomial regression for order is 1
# the label 3, W0, W1, W2 and W3 in the table is the weights of polynomial regression for order is 3
# the label 9, W0-W9 in the table is the weights of polynomial regression for order is 9

+-------------+---+----+-----+----------+
| label\order | 0 | 1  |  3  |    9     |
+-------------+---+----+-----+----------+
|      W0     | 0 | 0  |  0  |    0     |
|      W1     | 0 | -1 |  9  |   -440   |
|      W2     | 0 | 0  | -26 |   9055   |
|      W3     | 0 | 0  |  17 |  -75195  |
|      W4     | 0 | 0  |  0  |  335616  |
|      W5     | 0 | 0  |  0  | -887788  |
|      W6     | 0 | 0  |  0  | 1432242  |
|      W7     | 0 | 0  |  0  | -1381409 |
|      W8     | 0 | 0  |  0  |  730679  |
|      W9     | 0 | 0  |  0  | -162759  |
+-------------+---+----+-----+----------+

d. Draw a chart of fit data

weights of polynomial regression for order is 0

xx = np.linspace(0, 1, 100)
xx_transformed_0 = polynomial_0.fit_transform(xx.reshape(xx.shape[0], 1))
yy = poly_linear_model_0.predict(xx_transformed_0)
plt.plot(xx, yy,label="$y = N$")
plt.scatter(X_train, Y_train)
plt.scatter(X_test, Y_test, c = 'r')
plt.legend()

<matplotlib.legend.Legend at 0x1a437dee10>

png

weights of polynomial regression for order is 1

xx = np.linspace(0, 1, 100)
xx_transformed_1 = polynomial_1.fit_transform(xx.reshape(xx.shape[0], 1))
yy = poly_linear_model_1.predict(xx_transformed_1)
plt.plot(xx, yy,label="$y = ax$")
plt.scatter(X_train, Y_train)
plt.scatter(X_test, Y_test, c = 'r')
plt.legend()

<matplotlib.legend.Legend at 0x1a438c0c10>

png

weights of polynomial regression for order is 3

xx = np.linspace(0, 1, 100)
xx_transformed_3 = polynomial_3.fit_transform(xx.reshape(xx.shape[0], 1))
yy = poly_linear_model_3.predict(xx_transformed_3)
plt.plot(xx, yy,label="$y = ax3+bx2+cx+d$")
plt.scatter(X_train, Y_train)
plt.scatter(X_test, Y_test, c = 'r')
plt.legend()

<matplotlib.legend.Legend at 0x1a43a8a390>

png

weights of polynomial regression for order is 9

xx = np.linspace(0, 1, 100)
xx_transformed_9 = polynomial_9.fit_transform(xx.reshape(xx.shape[0], 1))
yy = poly_linear_model_9.predict(xx_transformed_9)
plt.plot(xx, yy,label="$y = ax9+....$")
plt.scatter(X_train, Y_train)
plt.scatter(X_test, Y_test, c = 'r')
plt.ylim(-1.5 ,1.5)
plt.legend()

<matplotlib.legend.Legend at 0x1a43bede90>

png

e. Draw train error vs test error

train_error = [0]*10     #train error
test_error = [0]*10      #test error

def getMse(Y, yy):
    standard = tf.square(Y - yy)
    mse = tf.reduce_mean(standard)
    return mse.numpy()

def getError(i:int,  model) :
    polynomial = PolynomialFeatures(degree = i)
    xx_transformed_test = polynomial.fit_transform(X_test.reshape(X_test.shape[0], 1))
    xx_transformed_train = polynomial.fit_transform(X_train.reshape(X_test.shape[0], 1))
    yy_test = model.predict(xx_transformed_test)
    yy_train = model.predict(xx_transformed_train)

    test_error[i] = getMse(Y_test, yy_test)

    train_error[i] = getMse(Y_train, yy_train)

polynomial_2, poly_linear_model_2 = polynomialRegression(2)
polynomial_4, poly_linear_model_4 = polynomialRegression(4)
polynomial_5, poly_linear_model_5 = polynomialRegression(5)
polynomial_6, poly_linear_model_6 = polynomialRegression(6)
polynomial_7, poly_linear_model_7 = polynomialRegression(7)
polynomial_8, poly_linear_model_8 = polynomialRegression(8)
# 0,1,3,9 I used the model fitted before.
getError(0, poly_linear_model_0)
getError(1, poly_linear_model_1)
getError(2, poly_linear_model_2)
getError(3, poly_linear_model_3)
getError(4, poly_linear_model_4)
getError(5, poly_linear_model_5)
getError(6, poly_linear_model_6)
getError(7, poly_linear_model_7)
getError(8, poly_linear_model_8)
getError(9, poly_linear_model_9)

print(test_error)
print(train_error)

[0.6498920057761227, 0.29804783140365465, 0.30113805806870736, 0.02726799080902726, 0.0271024086195014, 0.026776756066662428, 0.04301676452676815, 0.12082159598343782, 0.401395880004449, 14.044977128617717]
[0.24198978400252188, 0.14243643318053872, 0.14127787295342403, 0.006370123295882909, 0.006346077518346089, 0.0061639660337249, 0.004435216402598326, 0.0008846725534367012, 0.00025564239293912985, 3.1460399327300388e-21]

xx = np.linspace(0, 9, 10)     # error chart
plt.ylim(0 ,1)
plt.xlim(0,9)
plt.plot(xx, test_error, label = "$test error$", c = 'r')
plt.plot(xx, train_error, label = "$train error$", c = 'b')

plt.xlabel('Orders')

plt.ylabel('Error')
plt.legend()

<matplotlib.legend.Legend at 0x1a43c40190>

png

f. Generate 100 more data and fit 9th order model and draw fit

x_100 = np.linspace(0,1,100)     # Gegerate new 100 samples
d_100 = np.random.normal(loc=0,scale=0.2,size=100)    # N from the normal gaussian distribution 
y_100 = 2*math.pi*x_100
for i in range(100):
    y_100[i] = math.sin(y_100[i])+ d_100[i]
data_1 = {'X':x_100, 'Y':y_100}
data_100 = pd.DataFrame(data = data_1, dtype=np.int8)
data_100

	X	Y
0	0.000000	0.204449
1	0.010101	0.294845
2	0.020202	0.059705
3	0.030303	0.599017
4	0.040404	-0.021195
...	...	...
95	0.959596	-0.388439
96	0.969697	0.484030
97	0.979798	-0.015679
98	0.989899	0.012487
99	1.000000	0.029097

100 rows × 2 columns

plt.scatter(x_100, y_100, marker = "o",c = "r")

<matplotlib.collections.PathCollection at 0x1a43ca1290>

png

polynomial = PolynomialFeatures(degree = 9)# quadratic polynomial
x_transformed = polynomial.fit_transform(x_100.reshape(100,1))
poly_linear_model = LinearRegression()
poly_linear_model.fit(x_transformed, y_100)# train

xx_transformed_9 = polynomial.fit_transform(x_100.reshape(x_100.shape[0], 1))
yy = poly_linear_model.predict(xx_transformed_9)
plt.plot(x_100, yy,label="$y = ax9+.....$")
plt.scatter(x_100, y_100, c = "r")
plt.legend()

<matplotlib.legend.Legend at 0x1a43f87890>

png

g. Regularize using the sum of weights.

h. Draw chart for lamda

def regularizeRidge(alpha):
    if alpha < 0: alpha = math.exp(alpha)    # easy to calculate lambda. if lambda < 0, we calculate it as ln(lambda).
    else:
        print("alpha = ",alpha)
        if alpha != 0: print("ln(alpha) = ", math.log(alpha))
    polynomial = PolynomialFeatures(degree = 9)# quadratic polynomial
    x_transformed = polynomial.fit_transform(X_train.reshape(10,1))
    poly_linear_model = Ridge(alpha = alpha)
    poly_linear_model.fit(x_transformed, Y_train)# train

    return poly_linear_model

def chartRidge(alpha):
    model = regularizeRidge(alpha)
    xx = np.linspace(0, 1, 100)
    x_transformed = polynomial.fit_transform(xx.reshape(100,1))
    yy = model.predict(x_transformed)
    plt.plot(xx, yy,label=alpha)
    plt.scatter(X_train, Y_train)
    plt.scatter(X_test, Y_test, c = 'r')
    plt.legend()

chartRidge(0) #, lambda = 0

alpha =  0

png

chartRidge(0.1)    #ln(lambda) =  -4.6051701860e+0, lambda = 0.1

alpha =  0.1
ln(alpha) =  -2.3025850929940455

png

chartRidge(0.01)    #ln(lambda) = -25, lambda = 1.3887943864964021e-11

alpha =  0.01
ln(alpha) =  -4.605170185988091

png

chartRidge(0.001)    #ln(lambda) = -20, lambda = 2.061153622438558e-09

alpha =  0.001
ln(alpha) =  -6.907755278982137

png

chartRidge(0.0001)    #ln(lambda) = -15, lambda = 3.059023205018258e-07

alpha =  0.0001
ln(alpha) =  -9.210340371976182

png

i. Draw test and train error according to lamda

train_error_ridge = np.zeros(30)
test_error_ridge = np.zeros(30)

def getErrorRidge(i:int,  model) :     # A new error function
    xx_transformed_test = polynomial.fit_transform(X_test.reshape(X_test.shape[0], 1))
    xx_transformed_train = polynomial.fit_transform(X_train.reshape(X_train.shape[0], 1))
    yy_test = model.predict(xx_transformed_test)
    yy_train = model.predict(xx_transformed_train)
    test_error_ridge[i] = getMse(Y_test, yy_test)
    train_error_ridge[i] = getMse(Y_train, yy_train)

xx = list(range(-30, 0))
for i in xx:
    model = regularizeRidge(i)
    getErrorRidge(i, model)

xx = list(range(-30, 0))
plt.ylim(0 ,0.5)
plt.xlim(-30,0)
plt.plot(xx, test_error_ridge, label = "$test-error$", c = 'r')
plt.plot(xx, train_error_ridge, label = "$train-error$", c = 'b')

plt.xlabel('ln(lamdba)')

plt.ylabel('Error')
plt.legend()

<matplotlib.legend.Legend at 0x1a4401ad50>

png

#   get the best lambda
best_lambda = 0
for i in range(-30,0):
    if test_error_ridge[i+30] == test_error_ridge.min(): best_lambda = i
print("best ln(lambda) = ", best_lambda)
best_lambda_0 = math.exp(best_lambda)
print("best lambda = ", best_lambda_0)
print("In summary, the model which ln(lamdba) = ",best_lambda,", lambda = ",best_lambda_0," has the best test performance.")

best ln(lambda) =  -11
best lambda =  1.670170079024566e-05
In summary, the model which ln(lamdba) =  -11 , lambda =  1.670170079024566e-05  has the best test performance.

Polynomial regression

Assignment1 —— Polynomial regression

assignment

a. Generate 20 data pairs (X, Y) using y = sin(2piX) + N

b. Using room mean square error, find weights of polynomial regression for order is 0, 1, 3, 9

weights of polynomial regression for order is 0

weights of polynomial regression for order is 1

weights of polynomial regression for order is 3

weights of polynomial regression for order is 9

c. Display weights in table

d. Draw a chart of fit data

weights of polynomial regression for order is 0

weights of polynomial regression for order is 1

weights of polynomial regression for order is 3

weights of polynomial regression for order is 9

e. Draw train error vs test error

f. Generate 100 more data and fit 9th order model and draw fit

g. Regularize using the sum of weights.

h. Draw chart for lamda

i. Draw test and train error according to lamda

Hengchao wang

Computer Science, M.S. Software Engineering, M.Eng.

Polynomial regression

Assignment1 —— Polynomial regression

assignment

a. Generate 20 data pairs (X, Y) using y = sin(2*pi*X) + N

b. Using room mean square error, find weights of polynomial regression for order is 0, 1, 3, 9

weights of polynomial regression for order is 0

weights of polynomial regression for order is 1

weights of polynomial regression for order is 3

weights of polynomial regression for order is 9

c. Display weights in table

d. Draw a chart of fit data

weights of polynomial regression for order is 0

weights of polynomial regression for order is 1

weights of polynomial regression for order is 3

weights of polynomial regression for order is 9

e. Draw train error vs test error

f. Generate 100 more data and fit 9th order model and draw fit

g. Regularize using the sum of weights.

h. Draw chart for lamda

i. Draw test and train error according to lamda

Hengchao wang

Computer Science, M.S. Software Engineering, M.Eng.

a. Generate 20 data pairs (X, Y) using y = sin(2piX) + N