Source code for kiwi.cli.pipelines.evaluate

#  OpenKiwi: Open-Source Machine Translation Quality Estimation
#  Copyright (C) 2019 Unbabel <openkiwi@unbabel.com>
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Affero General Public License as published
#  by the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Affero General Public License for more details.
#
#  You should have received a copy of the GNU Affero General Public License
#  along with this program.  If not, see <https://www.gnu.org/licenses/>.
#

import logging

from kiwi.cli.better_argparse import PipelineParser
from kiwi.cli.opts import PathType
from kiwi.lib import evaluate

logger = logging.getLogger(__name__)


[docs]def evaluate_opts(parser):
    # Evaluation options

    group = parser.add_argument_group("Evaluation of WMT Quality Estimation")

    group.add_argument(
        "--type",
        help="Input type for prediction file",
        choices=["probs", "tags"],
        type=str,
        default="probs",
    )
    group.add_argument(
        "--format",
        help="Input format for gold files",
        choices=["wmt17", "wmt18"],
        type=str,
        default="wmt17",
    )
    group.add_argument(
        "--pred-format",
        help="Input format for predicted files. Defaults to the same as "
        "--format.",
        choices=["wmt17", "wmt18"],
        type=str,
        default="wmt18",
    )
    group.add_argument(
        "--sents-avg",
        help="Obtain scores for sentences by averaging over tags or "
        "probabilities.",
        choices=["probs", "tags"],
        type=str,
        # default=None
    )

    # Gold files.
    group.add_argument(
        "--gold-sents",
        help="Sentences gold standard. ",
        type=PathType(exists=True),
        required=False,
    )
    group.add_argument(
        "--gold-target",
        help="Target tags gold standard, or target and gaps "
        'if format == "wmt18".',
        type=PathType(exists=True),
        required=False,
    )
    group.add_argument(
        "--gold-source",
        help="Source tags gold standard.",
        type=PathType(exists=True),
        required=False,
    )
    group.add_argument(
        "--gold-cal",
        help="Target Tags to calibrate.",
        type=PathType(exists=True),
        required=False,
    )

    # Prediction Files
    group.add_argument(
        "--input-dir",
        help="Directory with prediction files generated by predict pipeline. "
        "Setting this argument will evaluate all predictions for "
        "which a gold file is set.",
        nargs="+",
        type=PathType(exists=True),
        # required=True
    )
    group.add_argument(
        "--pred-sents",
        help="Sentences HTER predictions.",
        type=PathType(exists=True),
        nargs="+",
        required=False,
    )
    group.add_argument(
        "--pred-target",
        help="Target predictions; can be tags or probabilities (of BAD). "
        "See --type.",
        type=PathType(exists=True),
        nargs="+",
        required=False,
    )
    group.add_argument(
        "--pred-gaps",
        help="Gap predictions; can be tags or probabilities (of BAD). "
        "(see --type). Use this option for files that only contain gap "
        "tags.",
        type=PathType(exists=True),
        nargs="+",
        required=False,
    )
    group.add_argument(
        "--pred-source",
        help="Source predictions. can be tags or probabilities (of BAD). "
        " See --type.",
        type=PathType(exists=True),
        nargs="+",
        required=False,
    )
    group.add_argument(
        "--pred-cal",
        help="Target Predictions to calibrate.",
        type=PathType(exists=True),
        required=False,
    )


[docs]def build_parser():
    return PipelineParser(
        name="evaluate",
        model_parsers=None,
        options_fn=evaluate_opts,
        add_io_options=True,
        add_general_options=False,
        add_logging_options=False,
        add_save_load_options=False,
    )


[docs]def main(argv=None):
    parser = build_parser()
    options = parser.parse(args=argv)
    evaluate.evaluate_from_options(options)


if __name__ == "__main__":
    main()