Source code for tint.models.roberta

try:
    from transformers.models.roberta import (
        RobertaConfig,
        RobertaTokenizer,
        RobertaForSequenceClassification,
    )
except ImportError:
    RobertaConfig = None
    RobertaTokenizer = None
    RobertaForSequenceClassification = None


[docs]def Roberta(
    pretrained_model_name_or_path: str = None,
    config=None,
    vocab_file=None,
    cache_dir=None,
    **kwargs,
):
    r"""
    Get Roberta model for sentence classification, either as a pre-trained
    model or from scratch.

    Args:
        pretrained_model_name_or_path: Path of the pre-trained model.
            If ``None``, return an untrained Roberta model.
            Default to ``None``
        config: Config of the Roberta. Required when not loading a
            pre-trained model, otherwise unused. Default to ``None``
        vocab_file: Path to a vocab file for the tokenizer.
            Default to ``None``
        cache_dir: Where to save pretrained model. Default to ``None``
        kwargs: Additional arguments for the tokenizer if not pretrained.

    Returns:
        2-element tuple of **Roberta Tokenizer**, **Roberta Model**:
        - **Roberta Tokenizer** (*RobertaTokenizer*):
            Roberta Tokenizer.
        - **Roberta Model** (*RobertaForSequenceClassification*):
            Roberta model for sentence classification.

    References:
        https://huggingface.co/docs/transformers/main/en/model_doc/roberta

    Examples:
        >>> from tint.models import Roberta
         <BLANKLINE>
         >>> tokenizer, model = Roberta("roberta-base")
    """
    assert RobertaConfig is not None, "transformers is not installed."

    # Return untrained bert model if path not provided
    if pretrained_model_name_or_path is None:
        assert config is not None, "Roberta config must be provided."
        assert vocab_file is not None, "vocab file must be provided."
        return (
            RobertaTokenizer(vocab_file, **kwargs),
            RobertaForSequenceClassification(config=config),
        )

    # Otherwise load pretrained model
    return (
        RobertaTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            cache_dir=cache_dir,
        ),
        RobertaForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path,
            cache_dir=cache_dir,
            return_dict=False,
        ),
    )