import mxnet as mx
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon import nn
from mxnet.gluon.data import vision
from mxnet.gluon.model_zoo import vision as models
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import h5py
import os
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
ctx = mx.gpu(0)
df = pd.read_csv('labels.csv')
synset = sorted(set(df['breed']))
n = len(df)
width = 224
X = np.zeros((n, 3, width, width), dtype=np.float32)
y = np.zeros((n,), dtype=np.float32)
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
for i, (fname, breed) in tqdm(df.iterrows(), total=n):
img = cv2.imread('train/%s.jpg' % fname)
X[i] = ((cv2.resize(img, (width, width))[:,:,::-1] / 255.0 - mean) / std).transpose((2, 0, 1))
y[i] = synset.index(breed)
batch_size = 128
data_iter = gluon.data.DataLoader(X, batch_size=batch_size)
net = models.resnet50_v2(pretrained=True, ctx=ctx)
features = []
for data in tqdm(data_iter):
x = net.features(data.as_in_context(ctx))
x = nn.GlobalAvgPool2D()(x)
x = nn.Flatten()(x)
features.append(x.asnumpy())
nd.waitall()
features = np.vstack(features)
X_train, X_valid, y_train, y_valid = train_test_split(features, y)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
def accuracy(output, labels):
return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()
def evaluate(net, data_iter):
loss, acc, n = 0., 0., 0.
steps = len(data_iter)
for data, label in data_iter:
data, label = data.as_in_context(ctx), label.as_in_context(ctx)
output = net(data)
acc += accuracy(output, label)
loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
return loss/steps, acc/steps
net = nn.Sequential()
with net.name_scope():
net.add(nn.Dropout(0.5))
net.add(nn.Dense(120))
net.initialize(ctx=ctx)
epochs = 10
batch_size = 128
data_iter_train = gluon.data.DataLoader(gluon.data.ArrayDataset(X_train, y_train), batch_size)
data_iter_valid = gluon.data.DataLoader(gluon.data.ArrayDataset(X_valid, y_valid), batch_size)
trainer = gluon.Trainer(net.collect_params(), 'adam')
for epoch in range(epochs):
train_loss = 0.
train_acc = 0.
steps = len(data_iter_train)
for data, label in data_iter_train:
data, label = data.as_in_context(ctx), label.as_in_context(ctx)
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += accuracy(output, label)
val_loss, val_acc = evaluate(net, data_iter_valid)
print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%%" % (
epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100))
params = net.collect_params()
class_weights = params[params.keys()[0]]
c = nn.Conv2D(channels=120, kernel_size=1)
c.initialize(ctx=ctx)
test = nd.random.normal(shape=(32, 2048, 7, 7), ctx=ctx)
c(test)
c.weight.set_data(class_weights.data().reshape((120, 2048, 1, 1)))
resnet = models.resnet50_v2(pretrained=True, ctx=ctx)
def forward(x):
x = nd.array(x, ctx=ctx)
x = resnet.features(x)
cams = c(x)
x = nn.GlobalAvgPool2D()(x)
x = nn.Flatten()(x)
predictions = net(x)
return predictions, cams
random_index = np.random.randint(n, size=12)
predictions, cams = forward(X[random_index])
predictions = nd.argmax(predictions, axis=1).asnumpy()
cams = cams.asnumpy()
plt.figure(figsize=(16, 12))
for i, index in enumerate(random_index):
plt.subplot(3, 4, i+1)
img = cv2.imread('train/%s.jpg' % df['id'][index])
cam = cams[i][int(predictions[i])]
cam -= cam.min()
cam /= cam.max()
cam = cv2.resize((cam * 255).astype(np.uint8), (img.shape[1], img.shape[0]))
heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
heatmap[cam < 64] = 0
out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0)
plt.imshow(out[:,:,::-1])
plt.title('pred:%s\nreal: %s' % (synset[int(predictions[i])], df['breed'][index]))