/
softmax_loss_with_negative_mining.py
110 lines (96 loc) · 4.79 KB
/
softmax_loss_with_negative_mining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# coding=utf-8
# Copyright (c) Alibaba, Inc. and its affiliates.
import tensorflow as tf
if tf.__version__ >= '2.0':
tf = tf.compat.v1
def support_vector_guided_softmax_loss(pos_score,
neg_scores,
margin=0,
t=1,
smooth=1.0,
threshold=0,
weights=1.0):
"""Refer paper: Support Vector Guided Softmax Loss for Face Recognition (https://128.84.21.199/abs/1812.11317)."""
new_pos_score = pos_score - margin
cond = tf.greater_equal(new_pos_score - neg_scores, threshold)
mask = tf.where(cond, tf.zeros_like(cond, tf.float32),
tf.ones_like(cond, tf.float32)) # I_k
new_neg_scores = mask * (neg_scores * t + t - 1) + (1 - mask) * neg_scores
logits = tf.concat([new_pos_score, new_neg_scores], axis=1)
if 1.0 != smooth:
logits *= smooth
loss = tf.losses.sparse_softmax_cross_entropy(
tf.zeros_like(pos_score, dtype=tf.int32), logits, weights=weights)
# set rank loss to zero if a batch has no positive sample.
loss = tf.where(tf.is_nan(loss), tf.zeros_like(loss), loss)
return loss
def softmax_loss_with_negative_mining(user_emb,
item_emb,
labels,
num_negative_samples=4,
embed_normed=False,
weights=1.0,
gamma=1.0,
margin=0,
t=1,
seed=None):
"""Compute the softmax loss based on the cosine distance explained below.
Given mini batches for `user_emb` and `item_emb`, this function computes for each element in `user_emb`
the cosine distance between it and the corresponding `item_emb`,
and additionally the cosine distance between `user_emb` and some other elements of `item_emb`
(referred to a negative samples).
The negative samples are formed on the fly by shifting the right side (`item_emb`).
Then the softmax loss will be computed based on these cosine distance.
Args:
user_emb: A `Tensor` with shape [batch_size, embedding_size]. The embedding of user.
item_emb: A `Tensor` with shape [batch_size, embedding_size]. The embedding of item.
labels: a `Tensor` with shape [batch_size]. e.g. click or not click in the session. It's values must be 0 or 1.
num_negative_samples: the num of negative samples, should be in range [1, batch_size).
embed_normed: bool, whether input embeddings l2 normalized
weights: `weights` acts as a coefficient for the loss. If a scalar is provided,
then the loss is simply scaled by the given value. If `weights` is a
tensor of shape `[batch_size]`, then the loss weights apply to each corresponding sample.
gamma: smooth coefficient of softmax
margin: the margin between positive pair and negative pair
t: coefficient of support vector guided softmax loss
seed: A Python integer. Used to create a random seed for the distribution.
See `tf.set_random_seed`
for behavior.
Return:
support vector guided softmax loss of positive labels
"""
assert 0 < num_negative_samples, '`num_negative_samples` should be greater than 0'
batch_size = tf.shape(item_emb)[0]
is_valid = tf.assert_less(
num_negative_samples,
batch_size,
message='`num_negative_samples` should be less than batch_size')
with tf.control_dependencies([is_valid]):
if not embed_normed:
user_emb = tf.nn.l2_normalize(user_emb, axis=-1)
item_emb = tf.nn.l2_normalize(item_emb, axis=-1)
vectors = [item_emb]
for i in range(num_negative_samples):
shift = tf.random_uniform([], 1, batch_size, dtype=tf.int32, seed=seed)
neg_item_emb = tf.roll(item_emb, shift, axis=0)
vectors.append(neg_item_emb)
# all_embeddings's shape: (batch_size, num_negative_samples + 1, vec_dim)
all_embeddings = tf.stack(vectors, axis=1)
mask = tf.greater(labels, 0)
mask_user_emb = tf.boolean_mask(user_emb, mask)
mask_item_emb = tf.boolean_mask(all_embeddings, mask)
if isinstance(weights, tf.Tensor):
weights = tf.boolean_mask(weights, mask)
# sim_scores's shape: (num_of_pos_label_in_batch_size, num_negative_samples + 1)
sim_scores = tf.keras.backend.batch_dot(
mask_user_emb, mask_item_emb, axes=(1, 2))
pos_score = tf.slice(sim_scores, [0, 0], [-1, 1])
neg_scores = tf.slice(sim_scores, [0, 1], [-1, -1])
loss = support_vector_guided_softmax_loss(
pos_score,
neg_scores,
margin=margin,
t=t,
smooth=gamma,
weights=weights)
return loss