From e1f7205e4bdf83330f715a8edd47d0bc9d829538 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Thu, 6 Jun 2024 15:13:48 +0100
Subject: [PATCH] Handle long-format ISO 8601 cycle points in IDs on the CLI

---
 changes.d/6123.fix.md     |  1 +
 cylc/flow/id.py           |  2 +-
 cylc/flow/id_cli.py       | 38 ++++++++++++++++++++++++++++++++++++--
 tests/unit/test_id.py     |  2 +-
 tests/unit/test_id_cli.py | 35 +++++++++++++++++++++++++++++++++++
 5 files changed, 74 insertions(+), 4 deletions(-)
 create mode 100644 changes.d/6123.fix.md

diff --git a/changes.d/6123.fix.md b/changes.d/6123.fix.md
new file mode 100644
index 00000000000..cef4f24a709
--- /dev/null
+++ b/changes.d/6123.fix.md
@@ -0,0 +1 @@
+Allow long-format datetime cycle points in IDs used on the command line.
\ No newline at end of file
diff --git a/cylc/flow/id.py b/cylc/flow/id.py
index cba3c483366..58fff7fa7bc 100644
--- a/cylc/flow/id.py
+++ b/cylc/flow/id.py
@@ -403,7 +403,7 @@ def duplicate(
 # //cycle[:sel][/task[:sel][/job[:sel]]]
 RELATIVE_PATTERN = rf'''
     //
-    (?P<{IDTokens.Cycle.value}>[^~\/:\n]+)
+    (?P<{IDTokens.Cycle.value}>[^~\/:\n][^~\/\n]*?)
     (?:
       :
       (?P<{IDTokens.Cycle.value}_sel>[^\/:\n]+)
diff --git a/cylc/flow/id_cli.py b/cylc/flow/id_cli.py
index ecce6517d3b..ef767cecc7f 100644
--- a/cylc/flow/id_cli.py
+++ b/cylc/flow/id_cli.py
@@ -20,6 +20,9 @@
 import re
 from typing import Optional, Dict, List, Tuple, Any
 
+from metomi.isodatetime.parsers import TimePointParser
+from metomi.isodatetime.exceptions import ISO8601SyntaxError
+
 from cylc.flow import LOG
 from cylc.flow.exceptions import (
     InputError,
@@ -28,6 +31,7 @@
 from cylc.flow.id import (
     Tokens,
     contains_multiple_workflows,
+    tokenise,
     upgrade_legacy_ids,
 )
 from cylc.flow.pathutil import EXPLICIT_RELATIVE_PATH_REGEX
@@ -43,6 +47,36 @@
 
 
 FN_CHARS = re.compile(r'[\*\?\[\]\!]')
+TP_PARSER = TimePointParser()
+
+
+def cli_tokenise(id_: str) -> Tokens:
+    """Tokenise with support for long-format datetimes.
+
+    If a cycle selector is present, it could be part of a long-format
+    ISO 8601 datetime that was erroneously split. Re-attach it if it
+    results in a valid datetime.
+
+    Examples:
+        >>> f = lambda t: {k: v for k, v in t.items() if v is not None}
+        >>> f(cli_tokenise('foo//2021-01-01T00:00Z'))
+        {'workflow': 'foo', 'cycle': '2021-01-01T00:00Z'}
+        >>> f(cli_tokenise('foo//2021-01-01T00:horse'))
+        {'workflow': 'foo', 'cycle': '2021-01-01T00', 'cycle_sel': 'horse'}
+    """
+    tokens = tokenise(id_)
+    cycle = tokens['cycle']
+    cycle_sel = tokens['cycle_sel']
+    if not (cycle and cycle_sel) or '-' not in cycle:
+        return tokens
+    cycle = f'{cycle}:{cycle_sel}'
+    try:
+        TP_PARSER.parse(cycle)
+    except ISO8601SyntaxError:
+        return tokens
+    dict.__setitem__(tokens, 'cycle', cycle)
+    del tokens['cycle_sel']
+    return tokens
 
 
 def _parse_cli(*ids: str) -> List[Tokens]:
@@ -124,14 +158,14 @@ def _parse_cli(*ids: str) -> List[Tokens]:
     tokens_list: List[Tokens] = []
     for id_ in ids:
         try:
-            tokens = Tokens(id_)
+            tokens = cli_tokenise(id_)
         except ValueError:
             if id_.endswith('/') and not id_.endswith('//'):  # noqa: SIM106
                 # tolerate IDs that end in a single slash on the CLI
                 # (e.g. CLI auto completion)
                 try:
                     # this ID is invalid with or without the trailing slash
-                    tokens = Tokens(id_[:-1])
+                    tokens = cli_tokenise(id_[:-1])
                 except ValueError:
                     raise InputError(f'Invalid ID: {id_}')
             else:
diff --git a/tests/unit/test_id.py b/tests/unit/test_id.py
index 4d46bebf725..2d50c9a2706 100644
--- a/tests/unit/test_id.py
+++ b/tests/unit/test_id.py
@@ -186,7 +186,7 @@ def test_universal_id_matches_hierarchical(identifier):
         '//~',
         '//:',
         '//workflow//cycle',
-        '//task:task_sel:task_sel'
+        '//cycle/task:task_sel:task_sel'
     ]
 )
 def test_relative_id_illegal(identifier):
diff --git a/tests/unit/test_id_cli.py b/tests/unit/test_id_cli.py
index fa8489f2465..8905bf3c4c4 100644
--- a/tests/unit/test_id_cli.py
+++ b/tests/unit/test_id_cli.py
@@ -30,6 +30,7 @@
     _validate_constraint,
     _validate_workflow_ids,
     _validate_number,
+    cli_tokenise,
     parse_ids_async,
 )
 from cylc.flow.pathutil import get_cylc_run_dir
@@ -607,3 +608,37 @@ async def test_expand_workflow_tokens_impl_selector(no_scan):
     tokens = tokens.duplicate(workflow_sel='stopped')
     with pytest.raises(InputError):
         await _expand_workflow_tokens([tokens])
+
+
+@pytest.mark.parametrize('identifier, expected', [
+    (
+        '//2024-01-01T00:fail/a',
+        {'cycle': '2024-01-01T00', 'cycle_sel': 'fail', 'task': 'a'}
+    ),
+    (
+        '//2024-01-01T00:00Z/a',
+        {'cycle': '2024-01-01T00:00Z', 'task': 'a'}
+    ),
+    (
+        '//2024-01-01T00:00Z:fail/a',
+        {'cycle': '2024-01-01T00:00Z', 'cycle_sel': 'fail', 'task': 'a'}
+    ),
+    (
+        '//2024-01-01T00:00:00+05:30/a',
+        {'cycle': '2024-01-01T00:00:00+05:30', 'task': 'a'}
+    ),
+    (
+        '//2024-01-01T00:00:00+05:30:f/a',
+        {'cycle': '2024-01-01T00:00:00+05:30', 'cycle_sel': 'f', 'task': 'a'}
+    ),
+    (
+        # Nonsensical example, but whatever...
+        '//2024-01-01T00:00Z:00Z/a',
+        {'cycle': '2024-01-01T00:00Z', 'cycle_sel': '00Z', 'task': 'a'}
+    )
+])
+def test_iso_long_fmt(identifier, expected):
+    assert {
+        k: v for k, v in cli_tokenise(identifier).items()
+        if v is not None
+    } == expected