Skip to content

OOD dataset

OODDataset

Bases: object

Class for managing loading and processing of datasets that are to be used for OOD detection. The class encapsulates a dataset like object augmented with OOD related information, and then returns a dataset like object that is suited for scoring or training with the .prepare method.

Parameters:

Name Type Description Default
dataset_id Union[DatasetType, tuple, dict, str]

The dataset to load. Can be loaded from tensorflow or torch datasets catalog when the str matches one of the datasets. Defaults to Union[DatasetType, tuple, dict, str].

required
backend str

Whether the dataset is to be used for tensorflow or torch models. Defaults to "tensorflow". Alternative: "torch".

'tensorflow'
keys list

keys to use for dataset elems. Default to None

None
load_kwargs dict

Additional loading kwargs when loading from tensorflow_datasets catalog. Defaults to {}.

{}
load_from_tensorflow_datasets bool

In the case where if the backend is torch but the user still wants to import from tensorflow_datasets catalog. In that case, tf.Tensor will not be loaded in VRAM and converted as torch.Tensors on the fly. Defaults to False.

False
input_key str

The key of the element/item to consider as the model input tensor. If None, taken as the first key. Defaults to None.

None
Source code in oodeel/datasets/ooddataset.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
class OODDataset(object):
    """Class for managing loading and processing of datasets that are to be used for
    OOD detection. The class encapsulates a dataset like object augmented with OOD
    related information, and then returns a dataset like object that is suited for
    scoring or training with the .prepare method.

    Args:
        dataset_id (Union[DatasetType, tuple, dict, str]): The dataset to load.
            Can be loaded from tensorflow or torch datasets catalog when the str matches
            one of the datasets. Defaults to Union[DatasetType, tuple, dict, str].
        backend (str, optional): Whether the dataset is to be used for tensorflow
             or torch models. Defaults to "tensorflow". Alternative: "torch".
        keys (list, optional): keys to use for dataset elems. Default to None
        load_kwargs (dict, optional): Additional loading kwargs when loading from
            tensorflow_datasets catalog. Defaults to {}.
        load_from_tensorflow_datasets (bool, optional): In the case where if the backend
            is torch but the user still wants to import from tensorflow_datasets
            catalog. In that case, tf.Tensor will not be loaded in VRAM and converted as
            torch.Tensors on the fly. Defaults to False.
        input_key (str, optional): The key of the element/item to consider as the
            model input tensor. If None, taken as the first key. Defaults to None.
    """

    def __init__(
        self,
        dataset_id: Union[DatasetType, tuple, dict, str],
        backend: str = "tensorflow",
        keys: Optional[list] = None,
        load_kwargs: dict = {},
        load_from_tensorflow_datasets: bool = False,
        input_key: Optional[str] = None,
    ):
        self.backend = backend
        self.load_from_tensorflow_datasets = load_from_tensorflow_datasets

        # The length of the dataset is kept as attribute to avoid redundant
        # iterations over self.data
        self.length = None

        # Set the load parameters for tfds / torchvision
        if backend == "tensorflow":
            load_kwargs["as_supervised"] = False
        # Set the channel order depending on the backend
        if self.backend == "torch":
            if load_from_tensorflow_datasets:
                from .tf_data_handler import TFDataHandler
                import tensorflow as tf

                tf.config.set_visible_devices([], "GPU")
                self._data_handler = TFDataHandler()
                load_kwargs["as_supervised"] = False
            else:
                from .torch_data_handler import TorchDataHandler

                self._data_handler = TorchDataHandler()
            self.channel_order = "channels_first"
        else:
            from .tf_data_handler import TFDataHandler

            self._data_handler = TFDataHandler()
            self.channel_order = "channels_last"

        self.load_params = load_kwargs
        # Load the dataset depending on the type of dataset_id
        self.data = self._data_handler.load_dataset(dataset_id, keys, load_kwargs)

        # Get the length of the elements/items in the dataset
        self.len_item = self._data_handler.get_item_length(self.data)
        if self.has_ood_label:
            self.len_item -= 1

        # Get the key of the tensor to feed the model with
        if input_key is None:
            self.input_key = self._data_handler.get_ds_feature_keys(self.data)[0]
        else:
            self.input_key = input_key

    def __len__(self) -> int:
        """get the length of the dataset.

        Returns:
            int: length of the dataset
        """
        if self.length is None:
            self.length = self._data_handler.get_dataset_length(self.data)
        return self.length

    @property
    def has_ood_label(self) -> bool:
        """Check if the dataset has an out-of-distribution label.

        Returns:
            bool: True if data handler has a "ood_label" feature key.
        """
        return self._data_handler.has_feature_key(self.data, "ood_label")

    def get_ood_labels(
        self,
    ) -> np.ndarray:
        """Get ood_labels from self.data if any

        Returns:
            np.ndarray: array of labels
        """
        assert self._data_handler.has_feature_key(
            self.data, "ood_label"
        ), "The data has no ood_labels"
        labels = self._data_handler.get_feature_from_ds(self.data, "ood_label")
        return labels

    def add_out_data(
        self,
        out_dataset: Union["OODDataset", DatasetType],
        in_value: int = 0,
        out_value: int = 1,
        resize: Optional[bool] = False,
        shape: Optional[Tuple[int]] = None,
    ) -> "OODDataset":
        """Concatenate two OODDatasets. Useful for scoring on multiple datasets, or
        training with added out-of-distribution data.

        Args:
            out_dataset (Union[OODDataset, DatasetType]): dataset of
                out-of-distribution data
            in_value (int): ood label value for in-distribution data. Defaults to 0
            out_value (int): ood label value for out-of-distribution data. Defaults to 1
            resize (Optional[bool], optional):toggles if input tensors of the
                datasets have to be resized to have the same shape. Defaults to False.
            shape (Optional[Tuple[int]], optional):shape to use for resizing input
                tensors. If None, the tensors are resized with the shape of the
                in_dataset input tensors. Defaults to None.

        Returns:
            OODDataset: a Dataset object with the concatenated data
        """

        # Creating an OODDataset object from out_dataset if necessary and make sure
        # the two OODDatasets have compatible parameters
        if isinstance(out_dataset, type(self)):
            out_dataset = out_dataset.data
        else:
            out_dataset = OODDataset(out_dataset, backend=self.backend).data

        # Assign the correct ood_label to self.data, depending on out_as_in
        self.data = self._data_handler.assign_feature_value(
            self.data, "ood_label", in_value
        )
        out_dataset = self._data_handler.assign_feature_value(
            out_dataset, "ood_label", out_value
        )

        # Merge the two underlying Datasets
        merge_kwargs = (
            {"channel_order": self.channel_order}
            if self.backend == "tensorflow"
            else {}
        )
        data = self._data_handler.merge(
            self.data,
            out_dataset,
            resize=resize,
            shape=shape,
            **merge_kwargs,
        )

        # Create a new OODDataset from the merged Dataset
        output_ds = OODDataset(
            dataset_id=data,
            backend=self.backend,
        )

        return output_ds

    def split_by_class(
        self,
        in_labels: Optional[Union[np.ndarray, list]] = None,
        out_labels: Optional[Union[np.ndarray, list]] = None,
    ) -> Optional[Tuple["OODDataset"]]:
        """Filter the dataset by assigning ood labels depending on labels
        value (typically, class id).

        Args:
            in_labels (Optional[Union[np.ndarray, list]], optional): set of labels
                to be considered as in-distribution. Defaults to None.
            out_labels (Optional[Union[np.ndarray, list]], optional): set of labels
                to be considered as out-of-distribution. Defaults to None.

        Returns:
            Optional[Tuple[OODDataset]]: Tuple of in-distribution and
                out-of-distribution OODDatasets
        """
        # Make sure the dataset has labels
        assert (in_labels is not None) or (
            out_labels is not None
        ), "specify labels to filter with"
        assert self.len_item >= 2, "the dataset has no labels"

        # Filter the dataset depending on in_labels and out_labels given
        if (out_labels is not None) and (in_labels is not None):
            in_data = self._data_handler.filter_by_feature_value(
                self.data, "label", in_labels
            )
            out_data = self._data_handler.filter_by_feature_value(
                self.data, "label", out_labels
            )

        if out_labels is None:
            in_data = self._data_handler.filter_by_feature_value(
                self.data, "label", in_labels
            )
            out_data = self._data_handler.filter_by_feature_value(
                self.data, "label", in_labels, excluded=True
            )

        elif in_labels is None:
            in_data = self._data_handler.filter_by_feature_value(
                self.data, "label", out_labels, excluded=True
            )
            out_data = self._data_handler.filter_by_feature_value(
                self.data, "label", out_labels
            )

        # Return the filtered OODDatasets
        return (
            OODDataset(in_data, backend=self.backend),
            OODDataset(out_data, backend=self.backend),
        )

    def prepare(
        self,
        batch_size: int = 128,
        preprocess_fn: Optional[Callable] = None,
        augment_fn: Optional[Callable] = None,
        with_ood_labels: bool = False,
        with_labels: bool = True,
        shuffle: bool = False,
        shuffle_buffer_size: Optional[int] = None,
    ) -> DatasetType:
        """Prepare self.data for scoring or training

        Args:
            batch_size (int, optional): Batch_size of the returned dataset like object.
                Defaults to 128.
            preprocess_fn (Callable, optional): Preprocessing function to apply to
                the dataset. Defaults to None.
            augment_fn (Callable, optional): Augment function to be used (when the
                returned dataset is to be used for training). Defaults to None.
            with_ood_labels (bool, optional): To return the dataset with ood_labels
                or not. Defaults to True.
            with_labels (bool, optional): To return the dataset with labels or not.
                Defaults to True.
            shuffle (bool, optional): To shuffle the returned dataset or not.
                Defaults to False.
            shuffle_buffer_size (int, optional): (TF only) Size of the shuffle buffer.
                If None, taken as the number of samples in the dataset.
                Defaults to None.

        Returns:
            DatasetType: prepared dataset
        """
        # Check if the dataset has at least one of label and ood_label
        assert (
            with_ood_labels or with_labels
        ), "The dataset must have at least one of label and ood_label"

        # Check if the dataset has ood_labels when asked to return with_ood_labels
        if with_ood_labels:
            assert (
                self.has_ood_label
            ), "Please assign ood labels before preparing with ood_labels"

        dataset_to_prepare = self.data

        # Making the dataset channel first if the backend is torch
        if self.backend == "torch" and self.load_from_tensorflow_datasets:
            dataset_to_prepare = self._data_handler.make_channel_first(
                self.input_key, dataset_to_prepare
            )

        # # Select the keys to be returned
        keys = [self.input_key, "label", "ood_label"]
        if not with_labels:
            keys.remove("label")
        if not with_ood_labels:
            keys.remove("ood_label")

        # Prepare the dataset for training or scoring
        dataset = self._data_handler.prepare_for_training(
            dataset=dataset_to_prepare,
            batch_size=batch_size,
            shuffle=shuffle,
            preprocess_fn=preprocess_fn,
            augment_fn=augment_fn,
            output_keys=keys,
            shuffle_buffer_size=shuffle_buffer_size,
        )

        return dataset

has_ood_label: bool property

Check if the dataset has an out-of-distribution label.

Returns:

Name Type Description
bool bool

True if data handler has a "ood_label" feature key.

__init__(dataset_id, backend='tensorflow', keys=None, load_kwargs={}, load_from_tensorflow_datasets=False, input_key=None)

Source code in oodeel/datasets/ooddataset.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    dataset_id: Union[DatasetType, tuple, dict, str],
    backend: str = "tensorflow",
    keys: Optional[list] = None,
    load_kwargs: dict = {},
    load_from_tensorflow_datasets: bool = False,
    input_key: Optional[str] = None,
):
    self.backend = backend
    self.load_from_tensorflow_datasets = load_from_tensorflow_datasets

    # The length of the dataset is kept as attribute to avoid redundant
    # iterations over self.data
    self.length = None

    # Set the load parameters for tfds / torchvision
    if backend == "tensorflow":
        load_kwargs["as_supervised"] = False
    # Set the channel order depending on the backend
    if self.backend == "torch":
        if load_from_tensorflow_datasets:
            from .tf_data_handler import TFDataHandler
            import tensorflow as tf

            tf.config.set_visible_devices([], "GPU")
            self._data_handler = TFDataHandler()
            load_kwargs["as_supervised"] = False
        else:
            from .torch_data_handler import TorchDataHandler

            self._data_handler = TorchDataHandler()
        self.channel_order = "channels_first"
    else:
        from .tf_data_handler import TFDataHandler

        self._data_handler = TFDataHandler()
        self.channel_order = "channels_last"

    self.load_params = load_kwargs
    # Load the dataset depending on the type of dataset_id
    self.data = self._data_handler.load_dataset(dataset_id, keys, load_kwargs)

    # Get the length of the elements/items in the dataset
    self.len_item = self._data_handler.get_item_length(self.data)
    if self.has_ood_label:
        self.len_item -= 1

    # Get the key of the tensor to feed the model with
    if input_key is None:
        self.input_key = self._data_handler.get_ds_feature_keys(self.data)[0]
    else:
        self.input_key = input_key

__len__()

get the length of the dataset.

Returns:

Name Type Description
int int

length of the dataset

Source code in oodeel/datasets/ooddataset.py
109
110
111
112
113
114
115
116
117
def __len__(self) -> int:
    """get the length of the dataset.

    Returns:
        int: length of the dataset
    """
    if self.length is None:
        self.length = self._data_handler.get_dataset_length(self.data)
    return self.length

add_out_data(out_dataset, in_value=0, out_value=1, resize=False, shape=None)

Concatenate two OODDatasets. Useful for scoring on multiple datasets, or training with added out-of-distribution data.

Parameters:

Name Type Description Default
out_dataset Union[OODDataset, DatasetType]

dataset of out-of-distribution data

required
in_value int

ood label value for in-distribution data. Defaults to 0

0
out_value int

ood label value for out-of-distribution data. Defaults to 1

1
resize Optional[bool]

toggles if input tensors of the datasets have to be resized to have the same shape. Defaults to False.

False
shape Optional[Tuple[int]]

shape to use for resizing input tensors. If None, the tensors are resized with the shape of the in_dataset input tensors. Defaults to None.

None

Returns:

Name Type Description
OODDataset OODDataset

a Dataset object with the concatenated data

Source code in oodeel/datasets/ooddataset.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def add_out_data(
    self,
    out_dataset: Union["OODDataset", DatasetType],
    in_value: int = 0,
    out_value: int = 1,
    resize: Optional[bool] = False,
    shape: Optional[Tuple[int]] = None,
) -> "OODDataset":
    """Concatenate two OODDatasets. Useful for scoring on multiple datasets, or
    training with added out-of-distribution data.

    Args:
        out_dataset (Union[OODDataset, DatasetType]): dataset of
            out-of-distribution data
        in_value (int): ood label value for in-distribution data. Defaults to 0
        out_value (int): ood label value for out-of-distribution data. Defaults to 1
        resize (Optional[bool], optional):toggles if input tensors of the
            datasets have to be resized to have the same shape. Defaults to False.
        shape (Optional[Tuple[int]], optional):shape to use for resizing input
            tensors. If None, the tensors are resized with the shape of the
            in_dataset input tensors. Defaults to None.

    Returns:
        OODDataset: a Dataset object with the concatenated data
    """

    # Creating an OODDataset object from out_dataset if necessary and make sure
    # the two OODDatasets have compatible parameters
    if isinstance(out_dataset, type(self)):
        out_dataset = out_dataset.data
    else:
        out_dataset = OODDataset(out_dataset, backend=self.backend).data

    # Assign the correct ood_label to self.data, depending on out_as_in
    self.data = self._data_handler.assign_feature_value(
        self.data, "ood_label", in_value
    )
    out_dataset = self._data_handler.assign_feature_value(
        out_dataset, "ood_label", out_value
    )

    # Merge the two underlying Datasets
    merge_kwargs = (
        {"channel_order": self.channel_order}
        if self.backend == "tensorflow"
        else {}
    )
    data = self._data_handler.merge(
        self.data,
        out_dataset,
        resize=resize,
        shape=shape,
        **merge_kwargs,
    )

    # Create a new OODDataset from the merged Dataset
    output_ds = OODDataset(
        dataset_id=data,
        backend=self.backend,
    )

    return output_ds

get_ood_labels()

Get ood_labels from self.data if any

Returns:

Type Description
ndarray

np.ndarray: array of labels

Source code in oodeel/datasets/ooddataset.py
128
129
130
131
132
133
134
135
136
137
138
139
140
def get_ood_labels(
    self,
) -> np.ndarray:
    """Get ood_labels from self.data if any

    Returns:
        np.ndarray: array of labels
    """
    assert self._data_handler.has_feature_key(
        self.data, "ood_label"
    ), "The data has no ood_labels"
    labels = self._data_handler.get_feature_from_ds(self.data, "ood_label")
    return labels

prepare(batch_size=128, preprocess_fn=None, augment_fn=None, with_ood_labels=False, with_labels=True, shuffle=False, shuffle_buffer_size=None)

Prepare self.data for scoring or training

Parameters:

Name Type Description Default
batch_size int

Batch_size of the returned dataset like object. Defaults to 128.

128
preprocess_fn Callable

Preprocessing function to apply to the dataset. Defaults to None.

None
augment_fn Callable

Augment function to be used (when the returned dataset is to be used for training). Defaults to None.

None
with_ood_labels bool

To return the dataset with ood_labels or not. Defaults to True.

False
with_labels bool

To return the dataset with labels or not. Defaults to True.

True
shuffle bool

To shuffle the returned dataset or not. Defaults to False.

False
shuffle_buffer_size int

(TF only) Size of the shuffle buffer. If None, taken as the number of samples in the dataset. Defaults to None.

None

Returns:

Name Type Description
DatasetType DatasetType

prepared dataset

Source code in oodeel/datasets/ooddataset.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def prepare(
    self,
    batch_size: int = 128,
    preprocess_fn: Optional[Callable] = None,
    augment_fn: Optional[Callable] = None,
    with_ood_labels: bool = False,
    with_labels: bool = True,
    shuffle: bool = False,
    shuffle_buffer_size: Optional[int] = None,
) -> DatasetType:
    """Prepare self.data for scoring or training

    Args:
        batch_size (int, optional): Batch_size of the returned dataset like object.
            Defaults to 128.
        preprocess_fn (Callable, optional): Preprocessing function to apply to
            the dataset. Defaults to None.
        augment_fn (Callable, optional): Augment function to be used (when the
            returned dataset is to be used for training). Defaults to None.
        with_ood_labels (bool, optional): To return the dataset with ood_labels
            or not. Defaults to True.
        with_labels (bool, optional): To return the dataset with labels or not.
            Defaults to True.
        shuffle (bool, optional): To shuffle the returned dataset or not.
            Defaults to False.
        shuffle_buffer_size (int, optional): (TF only) Size of the shuffle buffer.
            If None, taken as the number of samples in the dataset.
            Defaults to None.

    Returns:
        DatasetType: prepared dataset
    """
    # Check if the dataset has at least one of label and ood_label
    assert (
        with_ood_labels or with_labels
    ), "The dataset must have at least one of label and ood_label"

    # Check if the dataset has ood_labels when asked to return with_ood_labels
    if with_ood_labels:
        assert (
            self.has_ood_label
        ), "Please assign ood labels before preparing with ood_labels"

    dataset_to_prepare = self.data

    # Making the dataset channel first if the backend is torch
    if self.backend == "torch" and self.load_from_tensorflow_datasets:
        dataset_to_prepare = self._data_handler.make_channel_first(
            self.input_key, dataset_to_prepare
        )

    # # Select the keys to be returned
    keys = [self.input_key, "label", "ood_label"]
    if not with_labels:
        keys.remove("label")
    if not with_ood_labels:
        keys.remove("ood_label")

    # Prepare the dataset for training or scoring
    dataset = self._data_handler.prepare_for_training(
        dataset=dataset_to_prepare,
        batch_size=batch_size,
        shuffle=shuffle,
        preprocess_fn=preprocess_fn,
        augment_fn=augment_fn,
        output_keys=keys,
        shuffle_buffer_size=shuffle_buffer_size,
    )

    return dataset

split_by_class(in_labels=None, out_labels=None)

Filter the dataset by assigning ood labels depending on labels value (typically, class id).

Parameters:

Name Type Description Default
in_labels Optional[Union[ndarray, list]]

set of labels to be considered as in-distribution. Defaults to None.

None
out_labels Optional[Union[ndarray, list]]

set of labels to be considered as out-of-distribution. Defaults to None.

None

Returns:

Type Description
Optional[Tuple[OODDataset]]

Optional[Tuple[OODDataset]]: Tuple of in-distribution and out-of-distribution OODDatasets

Source code in oodeel/datasets/ooddataset.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def split_by_class(
    self,
    in_labels: Optional[Union[np.ndarray, list]] = None,
    out_labels: Optional[Union[np.ndarray, list]] = None,
) -> Optional[Tuple["OODDataset"]]:
    """Filter the dataset by assigning ood labels depending on labels
    value (typically, class id).

    Args:
        in_labels (Optional[Union[np.ndarray, list]], optional): set of labels
            to be considered as in-distribution. Defaults to None.
        out_labels (Optional[Union[np.ndarray, list]], optional): set of labels
            to be considered as out-of-distribution. Defaults to None.

    Returns:
        Optional[Tuple[OODDataset]]: Tuple of in-distribution and
            out-of-distribution OODDatasets
    """
    # Make sure the dataset has labels
    assert (in_labels is not None) or (
        out_labels is not None
    ), "specify labels to filter with"
    assert self.len_item >= 2, "the dataset has no labels"

    # Filter the dataset depending on in_labels and out_labels given
    if (out_labels is not None) and (in_labels is not None):
        in_data = self._data_handler.filter_by_feature_value(
            self.data, "label", in_labels
        )
        out_data = self._data_handler.filter_by_feature_value(
            self.data, "label", out_labels
        )

    if out_labels is None:
        in_data = self._data_handler.filter_by_feature_value(
            self.data, "label", in_labels
        )
        out_data = self._data_handler.filter_by_feature_value(
            self.data, "label", in_labels, excluded=True
        )

    elif in_labels is None:
        in_data = self._data_handler.filter_by_feature_value(
            self.data, "label", out_labels, excluded=True
        )
        out_data = self._data_handler.filter_by_feature_value(
            self.data, "label", out_labels
        )

    # Return the filtered OODDatasets
    return (
        OODDataset(in_data, backend=self.backend),
        OODDataset(out_data, backend=self.backend),
    )