From 662f467821a92052d2216cd212e487fa5fa76be9 Mon Sep 17 00:00:00 2001
From: SirStendec <sir@stendec.me>
Date: Wed, 22 Sep 2021 15:13:59 -0400
Subject: [PATCH 1/5] Add initial support for ICU MessageFormat. * Implements a
 fairly comprehensive checking class. * Adds documentation about the check to
 admin/checks.rst and user/checks.rst * Includes a new dependency on
 `pyicumessageformat`.

---
 docs/admin/checks.rst                   |   8 +
 docs/user/checks.rst                    |  67 +++-
 requirements.txt                        |   1 +
 weblate/checks/flags.py                 |   4 +
 weblate/checks/icu.py                   | 510 ++++++++++++++++++++++++
 weblate/checks/models.py                |   1 +
 weblate/checks/tests/test_checks.py     |   5 +-
 weblate/checks/tests/test_icu_checks.py | 396 ++++++++++++++++++
 weblate/settings_docker.py              |   1 +
 weblate/settings_example.py             |   1 +
 10 files changed, 992 insertions(+), 2 deletions(-)
 create mode 100644 weblate/checks/icu.py
 create mode 100644 weblate/checks/tests/test_icu_checks.py

diff --git a/docs/admin/checks.rst b/docs/admin/checks.rst
index a89de11225a5..d8162bf889a6 100644
--- a/docs/admin/checks.rst
+++ b/docs/admin/checks.rst
@@ -62,6 +62,10 @@ Here is a list of flags currently accepted:
     Define font-size for rendering checks, see :ref:`fonts`.
 ``font-spacing:SPACING``
     Define letter spacing for rendering checks, see :ref:`fonts`.
+``icu-flags:FLAGS``
+    Define flags for customizing the behavior of the :ref:`check-icu-message-format` quality check.
+``icu-tag-prefix:PREFIX``
+    Set a required prefix for XML tags for the :ref:`check-icu-message-format` quality check.
 ``placeholders:NAME:NAME2:...``
     Placeholder strings expected in translation, see :ref:`check-placeholders`.
 ``replacements:FROM:TO:FROM2:TO2...``
@@ -89,6 +93,8 @@ Here is a list of flags currently accepted:
     Enable the :ref:`check-es-format` quality check.
 ``i18next-interpolation``
     Enable the :ref:`check-i18next-interpolation` quality check.
+``icu-message-format``
+    Enable the :ref:`check-icu-message-format` quality check.
 ``java-format``
     Enable the :ref:`check-java-format` quality check.
 ``java-messageformat``
@@ -145,6 +151,8 @@ Here is a list of flags currently accepted:
     Skip the :ref:`check-es-format` quality check.
 ``ignore-i18next-interpolation``
     Skip the :ref:`check-i18next-interpolation` quality check.
+``ignore-icu-message-format``
+    Skip the :ref:`check-icu-message-format` quality check.
 ``ignore-java-format``
     Skip the :ref:`check-java-format` quality check.
 ``ignore-java-messageformat``
diff --git a/docs/user/checks.rst b/docs/user/checks.rst
index eab69d7b0f88..d7b8fc10db27 100644
--- a/docs/user/checks.rst
+++ b/docs/user/checks.rst
@@ -242,8 +242,73 @@ i18next interpolation
    :ref:`check-formats`,
    `i18next interpolation <https://www.i18next.com/translation-function/interpolation>`_
 
-.. _check-java-format:
 
+.. _check-icu-message-format:
+
+ICU MessageFormat
+*****************
+
+.. versionadded:: 4.9
+
+:Summary: There are inconsistencies or syntax errors within ICU MessageFormat placeholders.
+:Scope: translated strings
+:Check class: ``weblate.checks.icu.ICUMessageFormatCheck``
+:Flag to enable: ``icu-message-format``
+:Flag to ignore: ``ignore-icu-message-format``
+:Interpolation example: ``There {number, plural, one {is one apple} other {are # apples}}.``
+
+This check has support for both pure ICU MessageFormat messages as well as ICU with simple
+XML tags. You can configure the behavior of this check by using ``icu-flags:*``, either by
+opting into XML support or by disabling certain sub-checks. For example, the following flag
+enables XML support while disabling validation of plural sub-messages:
+
+.. code-block::text
+
+  icu-message-format, icu-flags:xml:-plural_selectors
+
++---------------------------+------------------------------------------------------------+
+| ``xml``                   | Enable support for simple XML tags. By default, XML tags   |
+|                           | are parsed loosely. Stray ``<`` characters are ignored     |
+|                           | if they are not reasonably part of a tag.                  |
++---------------------------+------------------------------------------------------------+
+| ``strict-xml``            | Enable support for strict XML tags. All ``<`` characters   |
+|                           | must be escaped if they are not part of a tag.             |
++---------------------------+------------------------------------------------------------+
+| ``-highlight``            | Disable highlighting placeholders in the editor.           |
++---------------------------+------------------------------------------------------------+
+| ``-require_other``        | Disable requiring sub-messages to have an ``other``        |
+|                           | selector.                                                  |
++---------------------------+------------------------------------------------------------+
+| ``-submessage_selectors`` | Skip checking that sub-message selectors match the source. |
++---------------------------+------------------------------------------------------------+
+| ``-types``                | Skip checking that placeholder types match the source.     |
++---------------------------+------------------------------------------------------------+
+| ``-extra``                | Skip checking that no placeholders are present that were   |
+|                           | not present in the source string.                          |
++---------------------------+------------------------------------------------------------+
+| ``-missing``              | Skip checking that no placeholders are missing that were   |
+|                           | present in the source string.                              |
++---------------------------+------------------------------------------------------------+
+
+Additionally, when ``strict-xml`` is not enabled but ``xml`` is enabled, you can use the
+``icu-tag-prefix:PREFIX`` flag to require that all XML tags start with a specific string.
+For example, the following flag will only allow XML tags to be matched if they start with
+``<x:``:
+
+.. code-block::text
+
+  icu-message-format, icu-flags:xml, icu-tag-prefix:"x:"
+
+This would match ``<x:link>click here</x:link>`` but not ``<strong>this</strong>``.
+
+.. seealso::
+
+  :ref:`check-formats`,
+  `ICU: Formatting Messages <https://unicode-org.github.io/icu/userguide/format_parse/messages/>`_,
+  `Format.JS: Message Syntax <https://formatjs.io/docs/core-concepts/icu-syntax/>`_
+
+
+.. _check-java-format:
 
 Java format
 ***********
diff --git a/requirements.txt b/requirements.txt
index 20e5745c2c73..d547f95a6fc2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,6 +24,7 @@ Pillow>=6.0.0,<8.4.0
 pyahocorasick>=1.4,<1.5
 pycairo>=1.15.3
 pygobject>=3.27.0
+pyicumessageformat>=1.0.0
 pyparsing>=2.4.0,<2.5.0
 python-dateutil>=2.8.1
 python-redis-lock>=3.6.0,<3.8.0
diff --git a/weblate/checks/flags.py b/weblate/checks/flags.py
index b993c3e6b445..56e3ba0ad156 100644
--- a/weblate/checks/flags.py
+++ b/weblate/checks/flags.py
@@ -65,6 +65,10 @@
 TYPED_FLAGS_ARGS["font-weight"] = single_value_flag(get_font_weight)
 TYPED_FLAGS["font-spacing"] = gettext_lazy("Font spacing")
 TYPED_FLAGS_ARGS["font-spacing"] = single_value_flag(int)
+TYPED_FLAGS["icu-flags"] = gettext_lazy("ICU MessageFormat Flags")
+TYPED_FLAGS_ARGS["icu-flags"] = multi_value_flag(str)
+TYPED_FLAGS["icu-tag-prefix"] = gettext_lazy("ICU MessageFormat Tag Prefix")
+TYPED_FLAGS_ARGS["icu-tag-prefix"] = single_value_flag(str)
 TYPED_FLAGS["priority"] = gettext_lazy("Priority")
 TYPED_FLAGS_ARGS["priority"] = single_value_flag(int)
 TYPED_FLAGS["max-length"] = gettext_lazy("Maximum length of translation")
diff --git a/weblate/checks/icu.py b/weblate/checks/icu.py
new file mode 100644
index 000000000000..2312985597ab
--- /dev/null
+++ b/weblate/checks/icu.py
@@ -0,0 +1,510 @@
+#
+# Copyright © 2012 - 2021 Michal Čihař <michal@cihar.com>
+#
+# This file is part of Weblate <https://weblate.org/>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+from collections import defaultdict
+
+from django.utils.translation import gettext_lazy as _
+from pyicumessageformat import Parser
+
+from weblate.checks.format import BaseFormatCheck
+
+# Unique value for checking tags. Since types are
+# always strings, this will never be encountered.
+TAG_TYPE = -100
+
+# These types are to be considered numeric. Numeric placeholders
+# can be of any numeric type without triggering a warning from
+# the checker.
+NUMERIC_TYPES = ["number", "plural", "selectordinal"]
+
+# These types have their sub-messages checked to ensure that
+# sub-message selectors are valid.
+PLURAL_TYPES = ["plural", "selectordinal"]
+
+# ... and these are the valid selectors, along with selectors
+# for specific values, formatted such as: =0, =1, etc.
+PLURAL_SELECTORS = ["zero", "one", "two", "few", "many", "other"]
+
+
+# We construct two Parser instances, one for tags and one without.
+# Both parsers are configured to allow spaces inside formats, to not
+# require other (which we can do better ourselves), and to be
+# permissive about what types can have sub-messages.
+standard_parser = Parser(
+    {
+        "include_indices": True,
+        "loose_submessages": True,
+        "allow_format_spaces": True,
+        "require_other": False,
+        "allow_tags": False,
+    }
+)
+
+tag_parser = Parser(
+    {
+        "include_indices": True,
+        "loose_submessages": True,
+        "allow_format_spaces": True,
+        "require_other": False,
+        "allow_tags": True,
+        "strict_tags": False,
+        "tag_type": TAG_TYPE,
+    }
+)
+
+
+strict_tag_parser = Parser(
+    {
+        "include_indices": True,
+        "loose_submessages": True,
+        "allow_format_spaces": True,
+        "require_other": False,
+        "allow_tags": True,
+        "strict_tags": True,
+        "tag_type": TAG_TYPE,
+    }
+)
+
+
+def parse_icu(
+    source: str,
+    allow_tags: bool,
+    strict_tags: bool,
+    tag_prefix: str = None,
+    want_tokens=False,
+):
+    """Parse an ICU MessageFormat message."""
+    ast = None
+    err = None
+    tokens = [] if want_tokens else None
+    parser = standard_parser
+    if allow_tags:
+        parser = strict_tag_parser if strict_tags else tag_parser
+
+    parser.options["tag_prefix"] = tag_prefix
+
+    try:
+        ast = parser.parse(source, tokens)
+    except SyntaxError as e:
+        err = e
+
+    parser.options["tag_prefix"] = None
+
+    return ast, err, tokens
+
+
+def check_bad_plural_selector(selector):
+    if selector in PLURAL_SELECTORS:
+        return False
+    return selector[0] != "="
+
+
+def update_maybe_value(value, old):
+    """
+    Certain values on placeholders can have four values.
+
+    They will be one of: `None`, `True`, `False`, or `0`.
+
+    `None` represents a value that was never set.
+    `True` or `False` represents a value that was set.
+    `0` represents a value that was set with conflicting values.
+
+    This is useful in case there are multiple placeholders with
+    conflicting type information.
+    """
+    if old is None or old == value:
+        return value
+    return 0
+
+
+def extract_highlights(token, source, out=None):
+    """Extract all placeholders from an AST that we would want to highlight."""
+    if out is None:
+        out = []
+
+    if isinstance(token, str):
+        return out
+
+    if isinstance(token, list):
+        for tok in token:
+            extract_highlights(tok, source, out)
+
+        return out
+
+    # Sanity check the token. They should always have
+    # start and end.
+    if "start" not in token or "end" not in token:
+        return out
+
+    start = token["start"]
+    end = token["end"]
+    usable = start < len(source)
+
+    if "hash" in token and token["hash"]:
+        usable = False
+
+    if "options" in token:
+        usable = False
+        for subast in token["options"].values():
+            extract_highlights(subast, source, out)
+
+    if "contents" in token:
+        usable = False
+        extract_highlights(token["contents"], source, out)
+
+    if usable:
+        out.append((start, end, source[start:end]))
+
+    return out
+
+
+def extract_placeholders(token, variables=None):
+    """Extract all placeholders from an AST and summarize their types."""
+    if variables is None:
+        variables = {}
+
+    if isinstance(token, str):
+        # Skip strings. Those aren't interesting.
+        return variables
+
+    if isinstance(token, list):
+        # If we have a list, then we have a list of tokens so iterate
+        # over the entire list.
+        for tok in token:
+            extract_placeholders(tok, variables)
+
+        return variables
+
+    if "name" not in token:
+        # There should always be a name. This is highly suspicious.
+        # Should this raise an exception?
+        return variables
+
+    name = token["name"]
+    ttype = token.get("type")
+    data = variables.setdefault(
+        name,
+        {
+            "name": name,
+            "types": set(),
+            "formats": set(),
+            "is_number": None,
+            "is_tag": None,
+            "is_empty": None,
+        },
+    )
+
+    if ttype:
+        is_tag = ttype is TAG_TYPE
+        data["is_tag"] = update_maybe_value(is_tag, data["is_tag"])
+
+        if is_tag:
+            data["is_empty"] = update_maybe_value(
+                "contents" not in token or not token["contents"], data["is_empty"]
+            )
+        else:
+            data["types"].add(ttype)
+            data["is_number"] = update_maybe_value(
+                ttype in NUMERIC_TYPES, data["is_number"]
+            )
+            if "format" in token:
+                data["formats"].add(token["format"])
+
+    if "options" in token:
+        choices = data.setdefault("choices", set())
+
+        # We need to do three things with options:
+        for selector, subast in token["options"].items():
+            # First, we log the selector for later comparison.
+            choices.add(selector)
+
+            # Second, we make sure the selector is valid if we're working
+            # with a plural/selectordinal type.
+            if ttype in PLURAL_TYPES:
+                if check_bad_plural_selector(selector):
+                    data.setdefault("bad_plural", set()).add(selector)
+
+            # Finally, we process the sub-ast for this option.
+            extract_placeholders(subast, variables)
+
+    # Make sure we process the contents sub-ast if one exists.
+    if "contents" in token:
+        extract_placeholders(token["contents"], variables)
+
+    return variables
+
+
+class ICUMessageFormatCheck(BaseFormatCheck):
+    """Check for ICU MessageFormat string."""
+
+    check_id = "icu_message_format"
+    name = _("ICU MessageFormat")
+    description = _(
+        "Syntax errors and/or placeholder mismatches in ICU MessageFormat strings."
+    )
+    source = True
+
+    def get_flags(self, unit):
+        if unit and unit.all_flags.has_value("icu-flags"):
+            return unit.all_flags.get_value("icu-flags")
+        return []
+
+    def get_tag_prefix(self, unit):
+        if unit and unit.all_flags.has_value("icu-tag-prefix"):
+            return unit.all_flags.get_value("icu-tag-prefix")
+        return None
+
+    def check_source_unit(self, source, unit):
+        """Checker for source strings. Only check for syntax issues."""
+        if not source or not source[0]:
+            return False
+
+        flags = self.get_flags(unit)
+        strict_tags = "strict-xml" in flags
+        allow_tags = strict_tags or "xml" in flags
+        tag_prefix = self.get_tag_prefix(unit)
+
+        _, src_err, _ = parse_icu(source[0], allow_tags, strict_tags, tag_prefix)
+        if src_err:
+            return True
+        return False
+
+    def check_format(self, source, target, ignore_missing, unit):
+        """Checker for ICU MessageFormat strings."""
+        if not target or not source:
+            return False
+
+        flags = self.get_flags(unit)
+        strict_tags = "strict-xml" in flags
+        allow_tags = strict_tags or "xml" in flags
+        tag_prefix = self.get_tag_prefix(unit)
+
+        result = defaultdict(list)
+        src_ast, src_err, _ = parse_icu(source, allow_tags, strict_tags, tag_prefix)
+
+        # Check to see if we're running on a source string only.
+        # If we are, then we can only run a syntax check on the
+        # source and be done.
+        if unit and unit.is_source:
+            if src_err:
+                result["syntax"].append(src_err)
+                return result
+            return False
+
+        tgt_ast, tgt_err, _ = parse_icu(target, allow_tags, strict_tags, tag_prefix)
+        if tgt_err:
+            result["syntax"].append(tgt_err)
+
+        if tgt_err:
+            return result
+        if src_err:
+            # We cannot run any further checks if the source
+            # string isn't valid, so just accept that the target
+            # string is valid for now.
+            return False
+
+        # Both strings are valid! Congratulations. Let's extract
+        # information on all the placeholders in both strings, and
+        # compare them to see if anything is wrong.
+        src_vars = extract_placeholders(src_ast)
+        tgt_vars = extract_placeholders(tgt_ast)
+
+        # First, we check all the variables in the target.
+        for name, data in tgt_vars.items():
+            self.check_for_other(result, name, data, flags)
+
+            if name in src_vars:
+                src_data = src_vars[name]
+
+                self.check_bad_plural(result, name, data, src_data, flags)
+                self.check_bad_submessage(result, name, data, src_data, flags)
+                self.check_wrong_type(result, name, data, src_data, flags)
+
+                if allow_tags:
+                    self.check_tags(result, name, data, src_data, flags)
+
+            else:
+                self.check_bad_submessage(result, name, data, None, flags)
+
+                # The variable does not exist in the source,
+                # which suggests a mistake.
+                if "-extra" not in flags:
+                    result["extra"].append(name)
+
+        # We also want to check for variables used in the
+        # source but not in the target.
+        self.check_missing(result, src_vars, tgt_vars, flags)
+
+        if result:
+            return result
+        return False
+
+    def check_missing(self, result, src_vars, tgt_vars, flags):
+        """Detect any variables in the target not in the source."""
+        if "-missing" in flags:
+            return
+
+        for name in src_vars:
+            if name not in tgt_vars:
+                result["missing"].append(name)
+
+    def check_for_other(self, result, name, data, flags):
+        """Ensure that types with sub-messages have other."""
+        if "-require_other" in flags:
+            return
+
+        choices = data.get("choices")
+        if choices and "other" not in choices:
+            result["no_other"].append(name)
+
+    def check_bad_plural(self, result, name, data, src_data, flags):
+        """Forward bad plural selectors detected during extraction."""
+        if "-plural_selectors" in flags:
+            return
+
+        if "bad_plural" in data:
+            result["bad_plural"].append([name, data["bad_plural"]])
+
+    def check_bad_submessage(self, result, name, data, src_data, flags):
+        """Detect any bad sub-message selectors."""
+        if "-submessage_selectors" in flags:
+            return
+
+        bad = set()
+
+        # We also want to check individual select choices.
+        if src_data and "select" in data["types"] and "select" in src_data["types"]:
+            if "choices" in data and "choices" in src_data:
+                choices = data["choices"]
+                src_choices = src_data["choices"]
+
+                for selector in choices:
+                    if selector not in src_choices:
+                        bad.add(selector)
+
+        if bad:
+            result["bad_submessage"].append([name, bad])
+
+    def check_wrong_type(self, result, name, data, src_data, flags):
+        """Ensure that types match, when possible."""
+        if "-types" in flags:
+            return
+
+        # If we're dealing with a number, we want to use
+        # special number logic, since numbers work with
+        # multiple types.
+        if isinstance(src_data["is_number"], bool) and src_data["is_number"]:
+            if src_data["is_number"] != data["is_number"]:
+                result["wrong_type"].append(name)
+
+        else:
+            for ttype in data["types"]:
+                if ttype not in src_data["types"]:
+                    result["wrong_type"].append(name)
+                    break
+
+    def check_tags(self, result, name, data, src_data, flags):
+        """Check for errors with XML tags."""
+        if "-tags" in flags:
+            return
+
+        if isinstance(src_data["is_tag"], bool) or data["is_tag"] is not None:
+            if src_data["is_tag"]:
+                if not data["is_tag"]:
+                    result["should_be_tag"].append(name)
+
+                elif (
+                    isinstance(src_data["is_empty"], bool)
+                    and src_data["is_empty"] != data["is_empty"]
+                ):
+                    if src_data["is_empty"]:
+                        result["tag_not_empty"].append(name)
+                    else:
+                        result["tag_empty"].append(name)
+
+            elif data["is_tag"]:
+                result["not_tag"].append(name)
+
+    def format_result(self, result):
+        if result.get("syntax"):
+            yield _("Syntax error: %s") % ", ".join(err.msg for err in result["syntax"])
+
+        if result.get("extra"):
+            yield _("Unknown placeholder in translation: %s") % ", ".join(
+                result["extra"]
+            )
+
+        if result.get("missing"):
+            yield _("Placeholder missing in translation: %s") % ", ".join(
+                result["missing"]
+            )
+
+        if result.get("wrong_type"):
+            yield _("Placeholder has wrong type: %s") % ", ".join(result["wrong_type"])
+
+        if result.get("no_other"):
+            yield _("Missing other sub-message for: %s") % ", ".join(result["no_other"])
+
+        if result.get("bad_plural"):
+            yield _("Bad plural selectors for: %s") % ", ".join(
+                f"{x[0]} ({', '.join(x[1])})" for x in result["bad_plural"]
+            )
+
+        if result.get("bad_submessage"):
+            yield _("Bad sub-message selectors for: %s") % ", ".join(
+                f"{x[0]} ({', '.join(x[1])})" for x in result["bad_submessage"]
+            )
+
+        if result.get("should_be_tag"):
+            yield _("Placeholder should be XML tag in translation: %s") % ", ".join(
+                result["should_be_tag"]
+            )
+
+        if result.get("not_tag"):
+            yield _("Placeholder should not be XML tag in translation: %s") % ", ".join(
+                result["not_tag"]
+            )
+
+        if result.get("tag_not_empty"):
+            yield _("XML Tag has unexpected contents in translation: %s") % ", ".join(
+                result["tag_not_empty"]
+            )
+
+        if result.get("tag_empty"):
+            yield _("XML Tag missing contents in translation: %s") % ", ".join(
+                result["tag_empty"]
+            )
+
+    def check_highlight(self, source, unit):
+        if self.should_skip(unit):
+            return []
+
+        flags = self.get_flags(unit)
+        if "-highlight" in flags:
+            return []
+
+        strict_tags = "strict-xml" in flags
+        allow_tags = strict_tags or "xml" in flags
+        tag_prefix = self.get_tag_prefix(unit)
+
+        ast, _, _ = parse_icu(source, allow_tags, strict_tags, tag_prefix, True)
+        if not ast:
+            return []
+
+        return extract_highlights(ast, source)
diff --git a/weblate/checks/models.py b/weblate/checks/models.py
index 496936e1711a..89d99dafd8aa 100644
--- a/weblate/checks/models.py
+++ b/weblate/checks/models.py
@@ -76,6 +76,7 @@ class WeblateChecksConf(AppConf):
         "weblate.checks.format.I18NextInterpolationCheck",
         "weblate.checks.format.ESTemplateLiteralsCheck",
         "weblate.checks.angularjs.AngularJSInterpolationCheck",
+        "weblate.checks.icu.ICUMessageFormatCheck",
         "weblate.checks.qt.QtFormatCheck",
         "weblate.checks.qt.QtPluralCheck",
         "weblate.checks.ruby.RubyFormatCheck",
diff --git a/weblate/checks/tests/test_checks.py b/weblate/checks/tests/test_checks.py
index ec7c2bf888fd..ccd204e9a1c6 100644
--- a/weblate/checks/tests/test_checks.py
+++ b/weblate/checks/tests/test_checks.py
@@ -75,7 +75,9 @@ def log_debug(text, *args):
 class MockUnit:
     """Mock unit object."""
 
-    def __init__(self, id_hash=None, flags="", code="cs", source="", note=""):
+    def __init__(
+        self, id_hash=None, flags="", code="cs", source="", note="", is_source=None
+    ):
         if id_hash is None:
             id_hash = random.randint(0, 65536)
         self.id_hash = id_hash
@@ -89,6 +91,7 @@ def __init__(self, id_hash=None, flags="", code="cs", source="", note=""):
         self.note = note
         self.check_cache = {}
         self.machinery = {"best": -1}
+        self.is_source = is_source
 
     @property
     def all_flags(self):
diff --git a/weblate/checks/tests/test_icu_checks.py b/weblate/checks/tests/test_icu_checks.py
new file mode 100644
index 000000000000..7c84cc26b5ce
--- /dev/null
+++ b/weblate/checks/tests/test_icu_checks.py
@@ -0,0 +1,396 @@
+#
+# Copyright © 2012 - 2021 Michal Čihař <michal@cihar.com>
+#
+# This file is part of Weblate <https://weblate.org/>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""Tests for ICU MessageFormat checks."""
+
+from weblate.checks.icu import ICUMessageFormatCheck
+from weblate.checks.tests.test_checks import CheckTestCase, MockUnit
+
+
+class ICUMessageFormatCheckTest(CheckTestCase):
+    check = ICUMessageFormatCheck()
+
+    id_hash = "icu_message_format"
+    flag = "icu-message-format"
+    flags = None
+
+    def get_mock(self, source=None, flags=None):
+        if not flags and self.flags:
+            flags = self.flags
+        elif flags and self.flags:
+            flags = f"{self.flags}:{flags}"
+
+        if flags:
+            flags = f"{self.flag}, icu-flags:{flags}"
+        else:
+            flags = self.flag
+
+        return MockUnit(
+            self.id_hash, flags=flags, source=source, is_source=source is not None
+        )
+
+    def test_plain(self):
+        self.assertFalse(
+            self.check.check_format("string", "string", False, self.get_mock())
+        )
+
+    def test_plain_source(self):
+        self.assertFalse(
+            self.check.check_format("string", "string", False, self.get_mock("string"))
+        )
+
+    def test_malformed(self):
+        result = self.check.check_format(
+            "Hello, {name}!", "Hello, {name!", False, self.get_mock()
+        )
+
+        self.assertTrue(result)
+        self.assertTrue("syntax" in result)
+        syntax = result["syntax"]
+        self.assertTrue(isinstance(syntax, list) and len(syntax) == 1)
+        self.assertTrue("Expected , or }" in syntax[0].msg)
+
+    def test_malformed_source(self):
+        # When dealing with a translation and not the source,
+        # any source issue is silently discarded.
+        self.assertFalse(
+            self.check.check_format(
+                "Hello, {name!", "Hello, {name}!", False, self.get_mock()
+            )
+        )
+
+        # However, if the unit is_source, we return syntax errors.
+        result = self.check.check_format(
+            "Hello, {name!", "Hello, {name}!", False, self.get_mock("Hello, {name!")
+        )
+
+        self.assertTrue(result)
+        self.assertTrue("syntax" in result)
+        syntax = result["syntax"]
+        self.assertTrue(isinstance(syntax, list) and len(syntax) == 1)
+        self.assertTrue("Expected , or }" in syntax[0].msg)
+
+    def test_source(self):
+        self.assertFalse(self.check.check_source_unit([""], self.get_mock()))
+        self.assertFalse(
+            self.check.check_source_unit(["Hello, {name}!"], self.get_mock())
+        )
+
+    def test_bad_source(self):
+        self.assertTrue(
+            self.check.check_source_unit(["Hello, {name!"], self.get_mock())
+        )
+
+    def test_no_formats(self):
+        self.assertFalse(
+            self.check.check_format(
+                "Hello, {name}!", "Hallo, {name}!", False, self.get_mock()
+            )
+        )
+
+    def test_whitespace(self):
+        self.assertFalse(
+            self.check.check_format(
+                "Hello, {  \t name\n  \n}!", "Hallo, {name}!", False, self.get_mock()
+            )
+        )
+
+    def test_missing_placeholder(self):
+        result = self.check.check_format(
+            "Hello, {name}!", "Hallo, Fred!", False, self.get_mock()
+        )
+
+        self.assertDictEqual(result, {"missing": ["name"]})
+
+    def test_extra_placeholder(self):
+        result = self.check.check_format(
+            "Hello, {firstName}!",
+            "Hallo, {firstName} {lastName}!",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(result, {"extra": ["lastName"]})
+
+    def test_types(self):
+        self.assertFalse(
+            self.check.check_format(
+                "Cost: {value, number, ::currency/USD}",
+                "Kosten: {value, number, ::currency/USD}",
+                False,
+                self.get_mock(),
+            )
+        )
+
+    def test_wrong_types(self):
+        result = self.check.check_format(
+            "Cost: {value, number, ::currency/USD}",
+            "Kosten: {value}",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(result, {"wrong_type": ["value"]})
+
+    def test_flag_wrong_types(self):
+        self.assertFalse(
+            self.check.check_format(
+                "{value, number}", "{value}", False, self.get_mock(None, "-types")
+            )
+        )
+
+    def test_more_wrong_types(self):
+        result = self.check.check_format(
+            "Cost: {value, foo}", "Kosten: {value, bar}", False, self.get_mock()
+        )
+
+        self.assertDictEqual(result, {"wrong_type": ["value"]})
+
+    def test_plural_types(self):
+        self.assertFalse(
+            self.check.check_format(
+                "You have {count, plural, one {# message} other {# messages}}. "
+                "Yes. {count, number}.",
+                "Sie haben {count, plural, one {# Nachricht} other "
+                "{# Nachrichten}}. Ja. {count, number}.",
+                False,
+                self.get_mock(),
+            )
+        )
+
+    def test_no_other(self):
+        result = self.check.check_format(
+            "{count, number}", "{count, plural, one {typo}}", False, self.get_mock()
+        )
+
+        self.assertDictEqual(result, {"no_other": ["count"]})
+
+    def test_flag_no_other(self):
+        inp = "{count, plural, one {#}}"
+        self.assertFalse(
+            self.check.check_format(
+                inp, inp, False, self.get_mock(inp, "-require_other")
+            )
+        )
+
+    def test_random_submessage(self):
+        inp = "{count, test, one {yes} other {no}}"
+        self.assertFalse(self.check.check_format(inp, inp, False, self.get_mock()))
+
+    def test_bad_select(self):
+        result = self.check.check_format(
+            "{pronoun,select,hehim{}sheher{}other{}}",
+            "{pronoun,select,he{}sheeher{}other{}}",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(
+            result, {"bad_submessage": [["pronoun", {"he", "sheeher"}]]}
+        )
+
+    def test_flag_bad_select(self):
+        # This also checks multiple flags.
+        self.assertFalse(
+            self.check.check_format(
+                "{n,select,one{}two{}}",
+                "{n,select,three{}four{}}",
+                False,
+                self.get_mock(None, "-require_other:-submessage_selectors"),
+            )
+        )
+
+    def test_bad_plural(self):
+        result = self.check.check_format(
+            "{count, number}",
+            "{count, plural, bad {typo} other {okay}}",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(result, {"bad_plural": [["count", {"bad"}]]})
+
+    def test_flag_bad_plural(self):
+        self.assertFalse(
+            self.check.check_format(
+                "{n,number}",
+                "{n,plural,bad{}other{}}",
+                False,
+                self.get_mock(None, "-plural_selectors"),
+            )
+        )
+
+    def test_good_plural(self):
+        self.assertFalse(
+            self.check.check_format(
+                "{count, number}",
+                "{count, plural, zero{#} one{#} two{#} few{#} many{#} "
+                "other{#} =0{#} =-12{#} =391.5{#}}",
+                False,
+                self.get_mock(),
+            )
+        )
+
+    def test_check_highlight(self):
+        highlights = self.check.check_highlight(
+            "Hello, <link> {na<>me} </link>. You have {count, plural, one "
+            "{# message} other {# messages}}.",
+            self.get_mock(),
+        )
+
+        self.assertListEqual(
+            highlights,
+            [
+                (14, 22, "{na<>me}"),
+            ],
+        )
+
+    def test_check_error_highlight(self):
+        highlights = self.check.check_highlight(
+            "Hello, {name}! You have {count,number", self.get_mock()
+        )
+
+        self.assertListEqual(highlights, [])
+
+    def test_check_flag_highlight(self):
+        highlights = self.check.check_highlight(
+            "Hello, {name}! You have {count,number", self.get_mock(None, "-highlight")
+        )
+
+        self.assertListEqual(highlights, [])
+
+    def test_check_no_highlight(self):
+        highlights = self.check.check_highlight(
+            "Hello, {name}!", MockUnit("java_format", flags="java-format")
+        )
+
+        self.assertListEqual(highlights, [])
+
+
+# This is a sub-class of our existing test set because this format is an extension
+# of the other format and it should handle all existing syntax properly.
+class ICUXMLFormatCheckTest(ICUMessageFormatCheckTest):
+
+    flags = "xml"
+
+    def test_tags(self):
+        self.assertFalse(
+            self.check.check_format(
+                "Hello <user/>! <link>Click here!</link>",
+                "Hallo <user />! <link>Klicke hier!</link>",
+                False,
+                None,
+            )
+        )
+
+    def test_empty_tags(self):
+        self.assertFalse(
+            self.check.check_format(
+                "<empty />", "<empty/><empty></empty>", False, self.get_mock()
+            )
+        )
+
+    def test_incorrectly_full_tags(self):
+        result = self.check.check_format(
+            "<empty /><full>tag</full>",
+            "<full /><empty>tag</empty>",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(
+            result, {"tag_not_empty": ["empty"], "tag_empty": ["full"]}
+        )
+
+    def test_tag_vs_placeholder(self):
+        result = self.check.check_format(
+            "Hello, <bold>{firstName}</bold>.",
+            "Hello {bold} <firstName />.",
+            False,
+            self.get_mock(),
+        )
+
+        self.assertDictEqual(
+            result,
+            {
+                "should_be_tag": ["bold"],
+                "not_tag": ["firstName"],
+            },
+        )
+
+    def test_flag_tags(self):
+        self.assertFalse(
+            self.check.check_format(
+                "Hello, <bold>{firstName}</bold>.",
+                "Hello {bold} <firstName />.",
+                False,
+                self.get_mock(None, "-tags"),
+            )
+        )
+
+    def test_check_highlight(self):
+        highlights = self.check.check_highlight(
+            "Hello, <link> {na<>me} </link>. You have {count, plural, "
+            "one {# message} other {# messages}}.",
+            self.get_mock(),
+        )
+
+        self.assertListEqual(
+            highlights,
+            [
+                (14, 22, "{na<>me}"),
+            ],
+        )
+
+    def test_not_a_tag(self):
+        self.assertFalse(
+            self.check.check_format(
+                "I <3 Software", "I <3 Open Source", False, self.get_mock()
+            )
+        )
+
+    def test_tag_prefix(self):
+        self.assertFalse(
+            self.check.check_format(
+                "<bold>test",
+                "<italic>test",
+                False,
+                self.get_mock(None, 'xml, icu-tag-prefix:"x:"'),
+            )
+        )
+
+
+class ICUXMLStrictFormatCheckTest(ICUXMLFormatCheckTest):
+
+    flags = "strict-xml"
+
+    def test_tag_prefix(self):
+        # Tag Prefix is ignored with strict tags.
+        pass
+
+    def test_not_a_tag(self):
+        result = self.check.check_format(
+            "I <3 Software", "I <3 Open Source", False, self.get_mock()
+        )
+
+        self.assertTrue(result)
+        self.assertTrue("syntax" in result)
+        syntax = result["syntax"]
+        self.assertTrue(isinstance(syntax, list) and len(syntax) == 1)
+        self.assertTrue("Expected > or />" in syntax[0].msg)
diff --git a/weblate/settings_docker.py b/weblate/settings_docker.py
index 4942f93bea91..cfbfd200057b 100644
--- a/weblate/settings_docker.py
+++ b/weblate/settings_docker.py
@@ -979,6 +979,7 @@
     "weblate.checks.format.I18NextInterpolationCheck",
     "weblate.checks.format.ESTemplateLiteralsCheck",
     "weblate.checks.angularjs.AngularJSInterpolationCheck",
+    "weblate.checks.icu.ICUMessageFormatCheck",
     "weblate.checks.qt.QtFormatCheck",
     "weblate.checks.qt.QtPluralCheck",
     "weblate.checks.ruby.RubyFormatCheck",
diff --git a/weblate/settings_example.py b/weblate/settings_example.py
index 0b06e418441d..54dcfc250d62 100644
--- a/weblate/settings_example.py
+++ b/weblate/settings_example.py
@@ -722,6 +722,7 @@
 #     "weblate.checks.format.I18NextInterpolationCheck",
 #     "weblate.checks.format.ESTemplateLiteralsCheck",
 #     "weblate.checks.angularjs.AngularJSInterpolationCheck",
+#     "weblate.checks.icu.ICUMessageFormatCheck",
 #     "weblate.checks.qt.QtFormatCheck",
 #     "weblate.checks.qt.QtPluralCheck",
 #     "weblate.checks.ruby.RubyFormatCheck",

From 477c7deddadba033a69b0bda19b024daae4bb503 Mon Sep 17 00:00:00 2001
From: SirStendec <sir@stendec.me>
Date: Fri, 24 Sep 2021 11:58:07 -0400
Subject: [PATCH 2/5] Add PrismJS syntax highlighting for ICU MessageFormat
 messages.

---
 scripts/yarn-update                           |   1 +
 .../vendor/prism/prism-icu-message-format.js  | 148 ++++++++++++++++++
 weblate/templates/base.html                   |   1 +
 weblate/trans/models/unit.py                  |   2 +
 4 files changed, 152 insertions(+)
 create mode 100644 weblate/static/vendor/prism/prism-icu-message-format.js

diff --git a/scripts/yarn-update b/scripts/yarn-update
index 1e9569b0d2be..cebbf1ae8bde 100755
--- a/scripts/yarn-update
+++ b/scripts/yarn-update
@@ -35,6 +35,7 @@ cp node_modules/prismjs/components/prism-core.js ../../weblate/static/vendor/pri
 cp node_modules/prismjs/components/prism-markup.js ../../weblate/static/vendor/prism/
 cp node_modules/prismjs/components/prism-rest.js ../../weblate/static/vendor/prism/
 cp node_modules/prismjs/components/prism-markdown.js ../../weblate/static/vendor/prism/
+cp node_modules/prismjs/components/prism-icu-message-format.js ../../weblate/static/vendor/prism/
 
 # Clipboard
 cp node_modules/clipboard/dist/clipboard.js ../../weblate/static/vendor/
diff --git a/weblate/static/vendor/prism/prism-icu-message-format.js b/weblate/static/vendor/prism/prism-icu-message-format.js
new file mode 100644
index 000000000000..7b9de169eb95
--- /dev/null
+++ b/weblate/static/vendor/prism/prism-icu-message-format.js
@@ -0,0 +1,148 @@
+// https://unicode-org.github.io/icu/userguide/format_parse/messages/
+// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/MessageFormat.html
+
+(function (Prism) {
+
+	/**
+	 * @param {string} source
+	 * @param {number} level
+	 * @returns {string}
+	 */
+	function nested(source, level) {
+		if (level <= 0) {
+			return /[]/.source;
+		} else {
+			return source.replace(/<SELF>/g, function () { return nested(source, level - 1); });
+		}
+	}
+
+	var stringPattern = /'[{}:=,](?:[^']|'')*'(?!')/;
+
+	var escape = {
+		pattern: /''/,
+		greedy: true,
+		alias: 'operator'
+	};
+	var string = {
+		pattern: stringPattern,
+		greedy: true,
+		inside: {
+			'escape': escape
+		}
+	};
+
+	var argumentSource = nested(
+		/\{(?:[^{}']|'(?![{},'])|''|<STR>|<SELF>)*\}/.source
+			.replace(/<STR>/g, function () { return stringPattern.source; }),
+		8
+	);
+
+	var nestedMessage = {
+		pattern: RegExp(argumentSource),
+		inside: {
+			'message': {
+				pattern: /^(\{)[\s\S]+(?=\}$)/,
+				lookbehind: true,
+				inside: null // see below
+			},
+			'message-delimiter': {
+				pattern: /./,
+				alias: 'punctuation'
+			}
+		}
+	};
+
+	Prism.languages['icu-message-format'] = {
+		'argument': {
+			pattern: RegExp(argumentSource),
+			greedy: true,
+			inside: {
+				'content': {
+					pattern: /^(\{)[\s\S]+(?=\}$)/,
+					lookbehind: true,
+					inside: {
+						'argument-name': {
+							pattern: /^(\s*)[^{}:=,\s]+/,
+							lookbehind: true
+						},
+						'choice-style': {
+							// https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1ChoiceFormat.html#details
+							pattern: /^(\s*,\s*choice\s*,\s*)\S(?:[\s\S]*\S)?/,
+							lookbehind: true,
+							inside: {
+								'punctuation': /\|/,
+								'range': {
+									pattern: /^(\s*)[+-]?(?:\d+(?:\.\d*)?|\u221e)\s*[<#\u2264]/,
+									lookbehind: true,
+									inside: {
+										'operator': /[<#\u2264]/,
+										'number': /\S+/
+									}
+								},
+								rest: null // see below
+							}
+						},
+						'plural-style': {
+							// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/PluralFormat.html#:~:text=Patterns%20and%20Their%20Interpretation
+							pattern: /^(\s*,\s*(?:plural|selectordinal)\s*,\s*)\S(?:[\s\S]*\S)?/,
+							lookbehind: true,
+							inside: {
+								'offset': /^offset:\s*\d+/,
+								'nested-message': nestedMessage,
+								'selector': {
+									pattern: /=\d+|[^{}:=,\s]+/,
+									inside: {
+										'keyword': /^(?:zero|one|two|few|many|other)$/
+									}
+								}
+							}
+						},
+						'select-style': {
+							// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/SelectFormat.html#:~:text=Patterns%20and%20Their%20Interpretation
+							pattern: /^(\s*,\s*select\s*,\s*)\S(?:[\s\S]*\S)?/,
+							lookbehind: true,
+							inside: {
+								'nested-message': nestedMessage,
+								'selector': {
+									pattern: /[^{}:=,\s]+/,
+									inside: {
+										'keyword': /^other$/
+									}
+								}
+							}
+						},
+						'keyword': /\b(?:choice|plural|select|selectordinal)\b/,
+						'arg-type': {
+							pattern: /\b(?:number|date|time|spellout|ordinal|duration)\b/,
+							alias: 'keyword'
+						},
+						'arg-skeleton': {
+							pattern: /(,\s*)::[^{}:=,\s]+/,
+							lookbehind: true
+						},
+						'arg-style': {
+							pattern: /(,\s*)(?:short|medium|long|full|integer|currency|percent)(?=\s*$)/,
+							lookbehind: true
+						},
+						'arg-style-text': {
+							pattern: RegExp(/(^\s*,\s*(?=\S))/.source + nested(/(?:[^{}']|'[^']*'|\{(?:<SELF>)?\})+/.source, 8) + '$'),
+							lookbehind: true,
+							alias: 'string'
+						},
+						'punctuation': /,/
+					}
+				},
+				'argument-delimiter': {
+					pattern: /./,
+					alias: 'operator'
+				}
+			}
+		},
+		'escape': escape,
+		'string': string
+	};
+
+	nestedMessage.inside.message.inside = Prism.languages['icu-message-format'];
+	Prism.languages['icu-message-format'].argument.inside.content.inside['choice-style'].inside.rest = Prism.languages['icu-message-format'];
+
+}(Prism));
diff --git a/weblate/templates/base.html b/weblate/templates/base.html
index 5bd1b0f761a6..e3547147d0b6 100644
--- a/weblate/templates/base.html
+++ b/weblate/templates/base.html
@@ -70,6 +70,7 @@
   <script defer data-cfasync="false" src="{% static 'vendor/prism/prism-markup.js' %}{{ cache_param }}"></script>
   <script defer data-cfasync="false" src="{% static 'vendor/prism/prism-rest.js' %}{{ cache_param }}"></script>
   <script defer data-cfasync="false" src="{% static 'vendor/prism/prism-markdown.js' %}{{ cache_param }}"></script>
+  <script defer data-cfasync="false" src="{% static 'vendor/prism/prism-icu-message-format.js' %}{{ cache_param }}"></script>
   <script defer data-cfasync="false" src="{% static 'loader-bootstrap.js' %}{{ cache_param }}"></script>
 {% endcompress %}
 
diff --git a/weblate/trans/models/unit.py b/weblate/trans/models/unit.py
index 074171c760a8..89dfd093cf26 100644
--- a/weblate/trans/models/unit.py
+++ b/weblate/trans/models/unit.py
@@ -1314,6 +1314,8 @@ def all_flags(self):
     def edit_mode(self):
         """Returns syntax higlighting mode for Prismjs."""
         flags = self.all_flags
+        if "icu-message-format" in flags:
+            return "icu-message-format"
         if "rst-text" in flags:
             return "rest"
         if "md-text" in flags:

From be7b552828c451b71a7eef3d8104b562ad201dda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Wed, 27 Oct 2021 09:47:33 +0200
Subject: [PATCH 3/5] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index d547f95a6fc2..a0d842ecc2a3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ Pillow>=6.0.0,<8.4.0
 pyahocorasick>=1.4,<1.5
 pycairo>=1.15.3
 pygobject>=3.27.0
-pyicumessageformat>=1.0.0
+pyicumessageformat>=1.0.0,<1.1.0
 pyparsing>=2.4.0,<2.5.0
 python-dateutil>=2.8.1
 python-redis-lock>=3.6.0,<3.8.0

From 1a56def17faf5594359b90a251da156c23b90cf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Wed, 27 Oct 2021 10:01:11 +0200
Subject: [PATCH 4/5] Apply suggestions from code review

---
 weblate/checks/icu.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/weblate/checks/icu.py b/weblate/checks/icu.py
index 2312985597ab..97685fad2994 100644
--- a/weblate/checks/icu.py
+++ b/weblate/checks/icu.py
@@ -236,9 +236,8 @@ def extract_placeholders(token, variables=None):
 
             # Second, we make sure the selector is valid if we're working
             # with a plural/selectordinal type.
-            if ttype in PLURAL_TYPES:
-                if check_bad_plural_selector(selector):
-                    data.setdefault("bad_plural", set()).add(selector)
+            if ttype in PLURAL_TYPES and check_bad_plural_selector(selector):
+                data.setdefault("bad_plural", set()).add(selector)
 
             # Finally, we process the sub-ast for this option.
             extract_placeholders(subast, variables)
@@ -389,14 +388,13 @@ def check_bad_submessage(self, result, name, data, src_data, flags):
         bad = set()
 
         # We also want to check individual select choices.
-        if src_data and "select" in data["types"] and "select" in src_data["types"]:
-            if "choices" in data and "choices" in src_data:
-                choices = data["choices"]
-                src_choices = src_data["choices"]
-
-                for selector in choices:
-                    if selector not in src_choices:
-                        bad.add(selector)
+        if src_data and "select" in data["types"] and "select" in src_data["types"] and "choices" in data and "choices" in src_data:
+            choices = data["choices"]
+            src_choices = src_data["choices"]
+
+            for selector in choices:
+                if selector not in src_choices:
+                    bad.add(selector)
 
         if bad:
             result["bad_submessage"].append([name, bad])

From 0a002749cba9ee0f905dec5794cc9fe024038b91 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Oct 2021 08:02:33 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 weblate/checks/icu.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/weblate/checks/icu.py b/weblate/checks/icu.py
index 97685fad2994..a9e87186c836 100644
--- a/weblate/checks/icu.py
+++ b/weblate/checks/icu.py
@@ -388,7 +388,13 @@ def check_bad_submessage(self, result, name, data, src_data, flags):
         bad = set()
 
         # We also want to check individual select choices.
-        if src_data and "select" in data["types"] and "select" in src_data["types"] and "choices" in data and "choices" in src_data:
+        if (
+            src_data
+            and "select" in data["types"]
+            and "select" in src_data["types"]
+            and "choices" in data
+            and "choices" in src_data
+        ):
             choices = data["choices"]
             src_choices = src_data["choices"]