Source code for gluonnlp.loss.loss

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""Loss functions."""

__all__ = ['MaskedSoftmaxCrossEntropyLoss', 'MaskedSoftmaxCELoss']

import numpy as np
from mxnet.gluon.loss import SoftmaxCELoss

[docs]class MaskedSoftmaxCrossEntropyLoss(SoftmaxCELoss): r"""Wrapper of the SoftmaxCELoss that supports valid_length as the input (alias: MaskedSoftmaxCELoss) If `sparse_label` is `True` (default), label should contain integer category indicators: .. math:: \DeclareMathOperator{softmax}{softmax} p = \softmax({pred}) L = -\sum_i \log p_{i,{label}_i} `label`'s shape should be `pred`'s shape with the channel dimension removed. i.e. for `pred` with shape (1,2,3) `label`'s shape should be (1,2). If `sparse_label` is `False`, `label` should contain probability distribution and `label`'s shape should be the same with `pred`: .. math:: p = \softmax({pred}) L = -\sum_i \sum_j {label}_j \log p_{ij} Parameters ---------- sparse_label : bool, default True Whether label is an integer array instead of probability distribution. from_logits : bool, default False Whether input is a log probability (usually from log_softmax) instead of unnormalized numbers. weight : float or None Global scalar weight for loss. Inputs: - **pred**: the prediction tensor, shape should be (N, T, C) - **label**: the truth tensor. When `sparse_label` is True, `label`'s shape should be `pred`'s shape with the channel dimension C removed. i.e. for `pred` with shape (1,2,3) `label`'s shape should be (1,2) and values should be integers between 0 and 2. If `sparse_label` is False, `label`'s shape must be the same as `pred` and values should be floats in the range `[0, 1]`. - **valid_length**: valid length of each sequence, of shape (batch_size, ) predictions elements longer than their valid_length are masked out Outputs: - **loss**: loss tensor with shape (batch_size,). Dimensions other than batch_axis are averaged out. """ def __init__(self, sparse_label=True, from_logits=False, weight=None, **kwargs): # The current technique only works with NTC data axis = -1 batch_axis = 0 super(MaskedSoftmaxCrossEntropyLoss, self).__init__(axis, sparse_label, from_logits, weight, batch_axis, **kwargs)
[docs] def hybrid_forward(self, F, pred, label, valid_length): # pylint: disable=arguments-differ if self._sparse_label: sample_weight = F.cast(F.expand_dims(F.ones_like(label), axis=-1), dtype=np.float32) else: sample_weight = F.ones_like(label) sample_weight = F.SequenceMask(sample_weight, sequence_length=valid_length, use_sequence_length=True, axis=1) return super(MaskedSoftmaxCrossEntropyLoss, self).hybrid_forward( F, pred, label, sample_weight)
MaskedSoftmaxCELoss = MaskedSoftmaxCrossEntropyLoss