Tìm hiểu AE for feature extraction

Posted by Hao Do on November 5, 2022

Tìm hiểu AE for feature extraction

Tìm hiểu autoencoder

Kiến trúc cơ bản của AutoEncoder

img

Code mẫu ví dụ

Load thư viện

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from keras.models import load_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LeakyReLU
from keras.layers import BatchNormalization
from keras.utils import plot_model
import matplotlib.pyplot as plt

Load datasets

1
2
3
4
#Dataset
X, y = make_classification(n_samples = 1000, n_features = 100, n_informative = 10, n_redundant=90, random_state = 1 , n_classes = 5)
print('Shape', X.shape)
X

Scale datasets

1
2
3
4
5
6
7
8
9
# Train test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 1)

#Scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
X_train.shape, X_test.shape

shape data

((670, 100), (330, 100))

Encoder

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
visible = Input(shape = (n_inputs,))
#Define Encoder
#level 1
e = Dense(n_inputs*2)(visible) # e = encoder
e = BatchNormalization()(e)
e = LeakyReLU()(e)

# level 2 
e = Dense(n_inputs)(e)
e = BatchNormalization()(e)
e = LeakyReLU()(e)

#Bottleneck 
n_bottleneck = round(float(n_inputs)/2.0)
bottleneck = Dense(n_bottleneck)(e)

# Define Decoder
#level 1
d = Dense(n_inputs)(bottleneck) # d = dencoder
d = BatchNormalization()(d)
d = LeakyReLU()(d)

#level 2
d = Dense(n_inputs*2)(d) # d = dencoder
d = BatchNormalization()(d)
d = LeakyReLU()(d)

#Output layer
output = Dense(n_inputs, activation = 'linear')(d)

#Define autoencoder model
model = Model(inputs = visible, outputs = output)

#compile autoencoder
model.compile(optimizer = 'adam', loss = 'mse')

Plot model

1
plot_model(model, 'autoencoder_compress.png', show_shapes = True)

img

fit model

1
2
# Fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs = 200, batch_size = 16, verbose = 2, validation_data = (X_test,X_test))

plot loss

1
2
3
4
5
6
7
8
9
10
#Plot Loss
fig, ax = plt.subplots(figsize=(10,5)) 
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.title('Autoencoder loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('Autoencoderloss.png')

img

Define an encoder model without the decoder

1
2
3
#Define an encoder model without the decoder
encoder = Model(inputs =visible, outputs = bottleneck)
plot_model(model, 'encoder_compress.png', show_shapes = True)

img

save model

1
2
# Save model
encoder.save('encoder.h5')

Base logistic Regression

1
2
3
4
5
6
7
8
9
10
11
X, y = make_classification(n_samples=1000, n_features=100, n_informative=10, n_redundant=90, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(acc)

Load model

1
2
3
4
5
6
7
8
9
10
11
12
# load the model from file
encoder = load_model('encoder.h5') 

# encode the  data
X_train_encode = encoder.predict(X_train)
X_test_encode = encoder.predict(X_test)

model_encode = LogisticRegression()
# fit the model on the training set
model_encode.fit(X_train_encode, y_train)
# make predictions on the test set
y_pred_encode = model_encode.predict(X_test_encode)

predict model

1
2
3
4
5
acc_encode = accuracy_score(y_test, y_pred_encode)
print(acc_encode)

print('Accuracy LogisticRegression Simple: {}'.format(acc))
print('Accuracy LogisticRegression With Autoencoder: {}'.format(acc_encode))

Full ipynb

Tài liệu tham khảo

Machine learning cơ bản

Hết.