导入必要的库

In [1]:
import mxnet as mx
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon import nn
from mxnet.gluon.data import vision
from mxnet.gluon.model_zoo import vision as models

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import h5py
import os

import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

ctx = mx.gpu(0)

载入训练集

In [2]:
df = pd.read_csv('labels.csv')
synset = sorted(set(df['breed']))
n = len(df)

width = 224
X = np.zeros((n, 3, width, width), dtype=np.float32)
y = np.zeros((n,), dtype=np.float32)

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

for i, (fname, breed) in tqdm(df.iterrows(), total=n):
    img = cv2.imread('train/%s.jpg' % fname)
    X[i] = ((cv2.resize(img, (width, width))[:,:,::-1] / 255.0 - mean) / std).transpose((2, 0, 1))
    y[i] = synset.index(breed)
100%|██████████| 10222/10222 [00:29<00:00, 342.04it/s]
In [3]:
batch_size = 128

data_iter = gluon.data.DataLoader(X, batch_size=batch_size)

net = models.resnet50_v2(pretrained=True, ctx=ctx)
features = []
for data in tqdm(data_iter):
    x = net.features(data.as_in_context(ctx))
    x = nn.GlobalAvgPool2D()(x)
    x = nn.Flatten()(x)
    features.append(x.asnumpy())
    nd.waitall()
100%|██████████| 80/80 [00:22<00:00,  1.14s/it]
In [4]:
features = np.vstack(features)
X_train, X_valid, y_train, y_valid = train_test_split(features, y)

一些函数

In [5]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

def accuracy(output, labels):
    return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()

def evaluate(net, data_iter):
    loss, acc, n = 0., 0., 0.
    steps = len(data_iter)
    for data, label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        output = net(data)
        acc += accuracy(output, label)
        loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
    return loss/steps, acc/steps

分类器

In [6]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dropout(0.5))
    net.add(nn.Dense(120))

net.initialize(ctx=ctx)

训练分类器

In [7]:
epochs = 10
batch_size = 128

data_iter_train = gluon.data.DataLoader(gluon.data.ArrayDataset(X_train, y_train), batch_size)
data_iter_valid = gluon.data.DataLoader(gluon.data.ArrayDataset(X_valid, y_valid), batch_size)

trainer = gluon.Trainer(net.collect_params(), 'adam')

for epoch in range(epochs):
    train_loss = 0.
    train_acc = 0.
    steps = len(data_iter_train)
    for data, label in data_iter_train:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)

        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    val_loss, val_acc = evaluate(net, data_iter_valid)

    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%%" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100))
Epoch 1. loss: 4.0482, acc: 26.72%, val_loss 3.2114, val_acc 64.94%
Epoch 2. loss: 2.6704, acc: 67.20%, val_loss 2.1478, val_acc 78.76%
Epoch 3. loss: 1.8386, acc: 77.91%, val_loss 1.5524, val_acc 82.21%
Epoch 4. loss: 1.3751, acc: 81.72%, val_loss 1.2235, val_acc 83.81%
Epoch 5. loss: 1.1091, acc: 83.70%, val_loss 1.0296, val_acc 84.20%
Epoch 6. loss: 0.9434, acc: 84.85%, val_loss 0.9061, val_acc 84.83%
Epoch 7. loss: 0.8285, acc: 85.75%, val_loss 0.8210, val_acc 84.94%
Epoch 8. loss: 0.7475, acc: 86.31%, val_loss 0.7605, val_acc 85.14%
Epoch 9. loss: 0.6878, acc: 86.87%, val_loss 0.7139, val_acc 85.30%
Epoch 10. loss: 0.6358, acc: 87.56%, val_loss 0.6780, val_acc 85.69%

获取分类器的权值并设置为 1x1 卷积层权重

In [8]:
params = net.collect_params()
class_weights = params[params.keys()[0]]
In [9]:
c = nn.Conv2D(channels=120, kernel_size=1)
c.initialize(ctx=ctx)
test = nd.random.normal(shape=(32, 2048, 7, 7), ctx=ctx)
c(test)
c.weight.set_data(class_weights.data().reshape((120, 2048, 1, 1)))

定义 forward 函数输出预测值和 CAM 图

In [10]:
resnet = models.resnet50_v2(pretrained=True, ctx=ctx)
def forward(x):
    x = nd.array(x, ctx=ctx)
    x = resnet.features(x)
    cams = c(x)
    x = nn.GlobalAvgPool2D()(x)
    x = nn.Flatten()(x)
    predictions = net(x)
    return predictions, cams

随机选择图片进行可视化

In [11]:
random_index = np.random.randint(n, size=12)
predictions, cams = forward(X[random_index])
predictions = nd.argmax(predictions, axis=1).asnumpy()
cams = cams.asnumpy()
In [12]:
plt.figure(figsize=(16, 12))
for i, index in enumerate(random_index):
    plt.subplot(3, 4, i+1)
    img = cv2.imread('train/%s.jpg' % df['id'][index])
    
    cam = cams[i][int(predictions[i])]
    cam -= cam.min()
    cam /= cam.max()
    cam = cv2.resize((cam * 255).astype(np.uint8), (img.shape[1], img.shape[0]))
    
    heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
    heatmap[cam < 64] = 0
    
    out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0)
    
    plt.imshow(out[:,:,::-1])
    plt.title('pred:%s\nreal: %s' % (synset[int(predictions[i])], df['breed'][index]))