# 基于图片相似度的图片搜索¶

## 二、环境配置¶

```import paddle
import numpy as np
import random
import matplotlib.pyplot as plt
from PIL import Image
from collections import defaultdict

```
```2.3.0
```

## 三、数据加载¶

### 3.1 数据集介绍¶

```import paddle.vision.transforms as T
transform = T.Compose([T.Transpose((2, 0, 1))])

x_train = np.zeros((50000, 3, 32, 32))
y_train = np.zeros((50000, 1), dtype='int32')

for i in range(len(cifar10_train)):
train_image, train_label = cifar10_train[i]

# normalize the data
x_train[i,:, :, :] = train_image / 255.
y_train[i, 0] = train_label

y_train = np.squeeze(y_train)

print(x_train.shape)
print(y_train.shape)
```
```(50000, 3, 32, 32)
(50000,)
```
```cifar10_test = paddle.vision.datasets.cifar.Cifar10(mode='test', transform=transform)
x_test = np.zeros((10000, 3, 32, 32), dtype='float32')
y_test = np.zeros((10000, 1), dtype='int64')

for i in range(len(cifar10_test)):
test_image, test_label = cifar10_test[i]

# normalize the data
x_test[i,:, :, :] = test_image / 255.
y_test[i, 0] = test_label

y_test = np.squeeze(y_test)

print(x_test.shape)
print(y_test.shape)
```
```(10000, 3, 32, 32)
(10000,)
```

### 3.2 数据探索¶

```height_width = 32

def show_collage(examples):
box_size = height_width + 2
num_rows, num_cols = examples.shape[:2]

collage = Image.new(
mode="RGB",
size=(num_cols * box_size, num_rows * box_size),
color=(255, 255, 255),
)
for row_idx in range(num_rows):
for col_idx in range(num_cols):
array = (np.array(examples[row_idx, col_idx]) * 255).astype(np.uint8)
array = array.transpose(1,2,0)
collage.paste(
Image.fromarray(array), (col_idx * box_size, row_idx * box_size)
)

collage = collage.resize((2 * num_cols * box_size, 2 * num_rows * box_size))
return collage

sample_idxs = np.random.randint(0, 50000, size=(5, 5))
examples = x_train[sample_idxs]
show_collage(examples)
```

### 3.3 构建训练数据¶

```class_idx_to_train_idxs = defaultdict(list)
for y_train_idx, y in enumerate(y_train):
class_idx_to_train_idxs[y].append(y_train_idx)

class_idx_to_test_idxs = defaultdict(list)
for y_test_idx, y in enumerate(y_test):
class_idx_to_test_idxs[y].append(y_test_idx)
```

```num_classes = 10

iter_step = 0
while True:
if iter_step >= num_batchs:
break
iter_step += 1
x = np.empty((2, num_classes, 3, height_width, height_width), dtype=np.float32)
for class_idx in range(num_classes):
examples_for_class = class_idx_to_train_idxs[class_idx]
anchor_idx = random.choice(examples_for_class)
positive_idx = random.choice(examples_for_class)
while positive_idx == anchor_idx:
positive_idx = random.choice(examples_for_class)
x[0, class_idx] = x_train[anchor_idx]
x[1, class_idx] = x_train[positive_idx]
yield x

# num_batchs: how many batchs to generate
def anchor_positive_pairs(num_batchs=100):
```
```pairs_train_reader = anchor_positive_pairs(num_batchs=1000)
```

```examples = next(pairs_train_reader())
print(examples.shape)
show_collage(examples)
```
```(2, 10, 3, 32, 32)
```

## 四、模型组网：把图片转换为高维的向量表示的网络¶

```class MyNet(paddle.nn.Layer):
def __init__(self):
super().__init__()

out_channels=32,
kernel_size=(3, 3),
stride=2)

out_channels=64,
kernel_size=(3,3),
stride=2)

out_channels=128,
kernel_size=(3,3),
stride=2)

def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = self.conv3(x)
x = F.relu(x)
x = self.gloabl_pool(x)
x = self.fc1(x)
x = x / paddle.norm(x, axis=1, keepdim=True)
return x
```

## 五、模型训练¶

• `inverse_temperature`参数起到的作用是让softmax在计算梯度时，能够处于梯度更显著的区域。（可以参考attention is all you need中，在点积之后的`scale`操作）。

• 整个计算过程，会先用上面的网络分别计算前10张图片（anchors)的高维表示，和后10张图片的高维表示。然后再用matmul计算前10张图片分别与后10张图片的相似度。（所以`similarities`会是一个`(10, 10)`的Tensor）。

• 在构造类别标签时，则相应的，可以构造出来0 ~ num_classes的标签值，用来让学习的目标成为相似的图片的相似度尽可能的趋向于1.0，而不相似的图片的相似度尽可能的趋向于-1.0。

```def train(model):
print('start training ... ')
model.train()

epoch_num = 20

parameters=model.parameters())

for epoch in range(epoch_num):
anchors_data, positives_data = data[0], data[1]

anchor_embeddings = model(anchors)
positive_embeddings = model(positives)

loss = F.cross_entropy(similarities, sparse_labels)

if batch_id % 500 == 0:
print("epoch: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, loss.numpy()))
loss.backward()
opt.step()

model = MyNet()
train(model)
```
```W0509 15:23:13.961760  1261 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 10.1
W0509 15:23:13.965698  1261 device_context.cc:465] device: 0, cuDNN Version: 7.6.

start training ...
epoch: 0, batch_id: 0, loss is: [2.2920926]
epoch: 0, batch_id: 500, loss is: [2.467431]
epoch: 1, batch_id: 0, loss is: [1.9296285]
epoch: 1, batch_id: 500, loss is: [2.195432]
epoch: 2, batch_id: 0, loss is: [2.0753798]
epoch: 2, batch_id: 500, loss is: [1.8138281]
epoch: 3, batch_id: 0, loss is: [1.5797657]
epoch: 3, batch_id: 500, loss is: [1.938111]
epoch: 4, batch_id: 0, loss is: [2.0983474]
epoch: 4, batch_id: 500, loss is: [1.8560457]
epoch: 5, batch_id: 0, loss is: [1.7827711]
epoch: 5, batch_id: 500, loss is: [2.2653818]
epoch: 6, batch_id: 0, loss is: [2.129322]
epoch: 6, batch_id: 500, loss is: [1.9394711]
epoch: 7, batch_id: 0, loss is: [1.9075954]
epoch: 7, batch_id: 500, loss is: [2.4538717]
epoch: 8, batch_id: 0, loss is: [1.7016968]
epoch: 8, batch_id: 500, loss is: [1.8498293]
epoch: 9, batch_id: 0, loss is: [2.018735]
epoch: 9, batch_id: 500, loss is: [2.1448936]
epoch: 10, batch_id: 0, loss is: [1.7357345]
epoch: 10, batch_id: 500, loss is: [1.8551898]
epoch: 11, batch_id: 0, loss is: [1.943936]
epoch: 11, batch_id: 500, loss is: [1.8482195]
epoch: 12, batch_id: 0, loss is: [1.9918009]
epoch: 12, batch_id: 500, loss is: [2.6647885]
epoch: 13, batch_id: 0, loss is: [2.0957792]
epoch: 13, batch_id: 500, loss is: [2.0709934]
epoch: 14, batch_id: 0, loss is: [1.7610024]
epoch: 14, batch_id: 500, loss is: [1.7273884]
epoch: 15, batch_id: 0, loss is: [1.6101546]
epoch: 15, batch_id: 500, loss is: [1.9188664]
epoch: 16, batch_id: 0, loss is: [1.7432076]
epoch: 16, batch_id: 500, loss is: [1.8815663]
epoch: 17, batch_id: 0, loss is: [1.7994094]
epoch: 17, batch_id: 500, loss is: [1.6375476]
epoch: 18, batch_id: 0, loss is: [1.8776023]
epoch: 18, batch_id: 500, loss is: [2.1464596]
epoch: 19, batch_id: 0, loss is: [2.0169969]
epoch: 19, batch_id: 500, loss is: [1.8177545]
```

## 六、模型预测¶

```near_neighbours_per_example = 10

test_images_embeddings = model(x_test_t)

indicies = indicies.numpy()
```
```examples = np.empty(
(
num_classes,
near_neighbours_per_example + 1,
3,
height_width,
height_width,
),
dtype=np.float32,
)

for row_idx in range(num_classes):
examples_for_class = class_idx_to_test_idxs[row_idx]
anchor_idx = random.choice(examples_for_class)

examples[row_idx, 0] = x_test[anchor_idx]
anchor_near_neighbours = indicies[anchor_idx][1:near_neighbours_per_example+1]
for col_idx, nn_idx in enumerate(anchor_near_neighbours):
examples[row_idx, col_idx + 1] = x_test[nn_idx]

show_collage(examples)
```