diff --git a/CMakeLists.txt b/CMakeLists.txt
index beaeb8576..c4283104f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,25 +82,41 @@ ac_check_headers("unistd.h")
 # windows POSIX-like API
 ac_check_headers("io.h")
 
+# supported languages
+set(re2c_langs "c" "d" "go" "haskell" "java" "js" "ocaml" "python" "rust" "v" "zig")
+
 # docs (manpages and help)
-set(re2c_manpage_source         "${CMAKE_CURRENT_BINARY_DIR}/doc/manpage.rst")
-set(re2c_help_source            "${CMAKE_CURRENT_BINARY_DIR}/doc/help.rst")
-set(re2c_manpage_bootstrap_c    "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/doc/re2c.1")
-set(re2c_manpage_bootstrap_go   "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/doc/re2go.1")
-set(re2c_manpage_bootstrap_rust "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/doc/re2rust.1")
-set(re2c_help_bootstrap         "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/src/msg/help.cc")
-set(re2c_manpage_c              "${CMAKE_CURRENT_BINARY_DIR}/doc/re2c.1")
-set(re2c_manpage_go             "${CMAKE_CURRENT_BINARY_DIR}/doc/re2go.1")
-set(re2c_manpage_rust           "${CMAKE_CURRENT_BINARY_DIR}/doc/re2rust.1")
-set(re2c_help                   "${CMAKE_CURRENT_BINARY_DIR}/src/msg/help.cc")
-set(re2c_rst2man                "${CMAKE_CURRENT_SOURCE_DIR}/build/rst2man.py")
-set(re2c_rst2txt                "${CMAKE_CURRENT_SOURCE_DIR}/build/rst2txt.py")
-set(re2c_splitman               "${CMAKE_CURRENT_SOURCE_DIR}/build/split_man.py")
+set(re2c_manpage_source  "${CMAKE_CURRENT_BINARY_DIR}/doc/manpage.rst")
+set(re2c_help_source     "${CMAKE_CURRENT_BINARY_DIR}/doc/help.rst")
+set(re2c_help_bootstrap  "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/src/msg/help.cc")
+set(re2c_manpage_c       "${CMAKE_CURRENT_BINARY_DIR}/doc/re2c.1")
+set(re2c_manpage_d       "${CMAKE_CURRENT_BINARY_DIR}/doc/re2d.1")
+set(re2c_manpage_go      "${CMAKE_CURRENT_BINARY_DIR}/doc/re2go.1")
+set(re2c_manpage_haskell "${CMAKE_CURRENT_BINARY_DIR}/doc/re2hs.1")
+set(re2c_manpage_java    "${CMAKE_CURRENT_BINARY_DIR}/doc/re2java.1")
+set(re2c_manpage_js      "${CMAKE_CURRENT_BINARY_DIR}/doc/re2js.1")
+set(re2c_manpage_ocaml   "${CMAKE_CURRENT_BINARY_DIR}/doc/re2ocaml.1")
+set(re2c_manpage_python  "${CMAKE_CURRENT_BINARY_DIR}/doc/re2python.1")
+set(re2c_manpage_rust    "${CMAKE_CURRENT_BINARY_DIR}/doc/re2rust.1")
+set(re2c_manpage_v       "${CMAKE_CURRENT_BINARY_DIR}/doc/re2v.1")
+set(re2c_manpage_zig     "${CMAKE_CURRENT_BINARY_DIR}/doc/re2zig.1")
+set(re2c_help            "${CMAKE_CURRENT_BINARY_DIR}/src/msg/help.cc")
+set(re2c_rst2man         "${CMAKE_CURRENT_SOURCE_DIR}/build/rst2man.py")
+set(re2c_rst2txt         "${CMAKE_CURRENT_SOURCE_DIR}/build/rst2txt.py")
+set(re2c_splitman        "${CMAKE_CURRENT_SOURCE_DIR}/build/split_man.py")
 set(re2c_docs
     "${re2c_help}"
     "${re2c_manpage_c}"
+    "$<$<BOOL:${RE2C_BUILD_RE2D}>:${re2c_manpage_d}>"
     "$<$<BOOL:${RE2C_BUILD_RE2GO}>:${re2c_manpage_go}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2HS}>:${re2c_manpage_haskell}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2JAVA}>:${re2c_manpage_java}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2JS}>:${re2c_manpage_js}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2OCAML}>:${re2c_manpage_ocaml}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2PYTHON}>:${re2c_manpage_python}>"
     "$<$<BOOL:${RE2C_BUILD_RE2RUST}>:${re2c_manpage_rust}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2V}>:${re2c_manpage_v}>"
+    "$<$<BOOL:${RE2C_BUILD_RE2ZIG}>:${re2c_manpage_zig}>"
 )
 
 # syntax files
@@ -233,6 +249,28 @@ re2c_bootstrap_lexer("src/parse/conf_lexer.re" "src/parse/conf_lexer.cc")
 re2c_bootstrap_parser("src/parse/conf_parser.ypp" "src/parse/conf_parser.cc"
     "src/parse/conf_parser.h")
 
+# docs
+file(GLOB_RECURSE re2c_docs_sources CONFIGURE_DEPENDS
+    "examples/*"
+    "doc/manual/*"
+    "${re2c_manpage_source}"
+)
+
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_c}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_d}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_go}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_haskell}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_java}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_js}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_ocaml}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_python}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_rust}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_v}")
+re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_zig}")
+
+re2c_gen_help("${re2c_help_source}" "${re2c_help}" "${re2c_help_bootstrap}")
+add_custom_target(docs DEPENDS "${re2c_docs}")
+
 re2c_bootstrap_syntax("include/syntax/c" "src/default_syntax_c.cc")
 re2c_bootstrap_syntax("include/syntax/d" "src/default_syntax_d.cc")
 re2c_bootstrap_syntax("include/syntax/go" "src/default_syntax_go.cc")
@@ -338,123 +376,6 @@ if (RE2C_BUILD_RE2ZIG)
     )
 endif()
 
-# docs
-set(re2c_docs_sources
-    "${re2c_manpage_source}"
-    "doc/manual/api/api1.rst_"
-    "doc/manual/api/api2_c.rst_"
-    "doc/manual/api/api2_go.rst_"
-    "doc/manual/api/api2_rust.rst_"
-    "doc/manual/api/api3.rst_"
-    "doc/manual/conditions/blocks.rst_"
-    "doc/manual/conditions/conditions.rst_"
-    "doc/manual/configurations/configurations.rst_"
-    "doc/manual/directives/directives.rst_"
-    "doc/manual/dot/dot.rst_"
-    "doc/manual/encodings/encodings.rst_"
-    "doc/manual/eof/01_sentinel.rst_"
-    "doc/manual/eof/02_bounds_checking.rst_"
-    "doc/manual/eof/03_eof_rule.rst_"
-    "doc/manual/eof/04_fake_sentinel.rst_"
-    "doc/manual/eof/eof.rst_"
-    "doc/manual/fill/01_fill.rst_"
-    "doc/manual/fill/02_fill.rst_"
-    "doc/manual/fill/fill.rst_"
-    "doc/manual/headers/headers.rst_"
-    "doc/manual/includes/includes.rst_"
-    "doc/manual/options/debug.rst_"
-    "doc/manual/options/internal.rst_"
-    "doc/manual/options/options.rst_"
-    "doc/manual/regexps/regular_expressions.rst_"
-    "doc/manual/reuse/reuse.rst_"
-    "doc/manual/skeleton/skeleton.rst_"
-    "doc/manual/state/state.rst_"
-    "doc/manual/submatch/submatch_example_mtags.rst_"
-    "doc/manual/submatch/submatch_example_captures.rst_"
-    "doc/manual/submatch/submatch_example_stags_fill.rst_"
-    "doc/manual/submatch/submatch_example_stags.rst_"
-    "doc/manual/submatch/submatch.rst_"
-    "doc/manual/synopsis.rst_"
-    "doc/manual/syntax/intro.rst_"
-    "doc/manual/syntax/syntax.rst_"
-    "doc/manual/warnings/warnings_general.rst_"
-    "doc/manual/warnings/warnings_list.rst_"
-    "examples/c/01_basic.re"
-    "examples/c/01_basic.c"
-    "examples/c/conditions/parse_u32_blocks.re"
-    "examples/c/conditions/parse_u32_conditions.re"
-    "examples/c/encodings/unicode_identifier.re"
-    "examples/c/eof/01_sentinel.re"
-    "examples/c/eof/02_bounds_checking.re"
-    "examples/c/eof/03_eof_rule.re"
-    "examples/c/eof/04_fake_sentinel.re"
-    "examples/c/fill/01_fill.re"
-    "examples/c/fill/02_fill.re"
-    "examples/c/headers/header.re"
-    "examples/c/headers/lexer/state.h"
-    "examples/c/includes/include.re"
-    "examples/c/includes/definitions.h"
-    "examples/c/reuse/reuse.re"
-    "examples/c/reuse/usedir.re"
-    "examples/c/state/push.re"
-    "examples/c/submatch/01_stags_fill.re"
-    "examples/c/submatch/01_stags.re"
-    "examples/c/submatch/02_mtags.re"
-    "examples/c/submatch/03_captures.re"
-    "examples/c/submatch/04_posix_captures.re"
-    "examples/go/01_basic.re"
-    "examples/go/01_basic.go"
-    "examples/go/conditions/parse_u32_blocks.re"
-    "examples/go/conditions/parse_u32_conditions.re"
-    "examples/go/encodings/unicode_identifier.re"
-    "examples/go/eof/01_sentinel.re"
-    "examples/go/eof/02_bounds_checking.re"
-    "examples/go/eof/03_eof_rule.re"
-    "examples/go/eof/04_fake_sentinel.re"
-    "examples/go/fill/01_fill.re"
-    "examples/go/fill/02_fill.re"
-    "examples/go/headers/header.re"
-    "examples/go/headers/lexer/state.go"
-    "examples/go/includes/include.re"
-    "examples/go/includes/definitions.go"
-    "examples/go/reuse/reuse.re"
-    "examples/go/reuse/usedir.re"
-    "examples/go/state/push.re"
-    "examples/go/submatch/01_stags_fill.re"
-    "examples/go/submatch/01_stags.re"
-    "examples/go/submatch/02_mtags.re"
-    "examples/go/submatch/03_captures.re"
-    "examples/go/submatch/04_posix_captures.re"
-    "examples/rust/01_basic.re"
-    "examples/rust/01_basic.rs"
-    "examples/rust/conditions/parse_u32_blocks.re"
-    "examples/rust/conditions/parse_u32_conditions.re"
-    "examples/rust/encodings/unicode_identifier.re"
-    "examples/rust/eof/01_sentinel.re"
-    "examples/rust/eof/02_bounds_checking.re"
-    "examples/rust/eof/03_eof_rule.re"
-    "examples/rust/eof/04_fake_sentinel.re"
-    "examples/rust/fill/01_fill.re"
-    "examples/rust/fill/02_fill.re"
-    "examples/rust/headers/header.re"
-    "examples/rust/headers/lexer/state.rs"
-    "examples/rust/includes/include.re"
-    "examples/rust/includes/definitions.rs"
-    "examples/rust/reuse/reuse.re"
-    "examples/rust/reuse/usedir.re"
-    "examples/rust/state/push.re"
-    "examples/rust/submatch/01_stags_fill.re"
-    "examples/rust/submatch/01_stags.re"
-    "examples/rust/submatch/02_mtags.re"
-    "examples/rust/submatch/03_captures.re"
-    "examples/rust/submatch/04_posix_captures.re"
-)
-re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_c}"    "${re2c_manpage_bootstrap_c}"  "c")
-re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_go}"   "${re2c_manpage_bootstrap_go}" "go")
-re2c_gen_manpage("${re2c_manpage_source}" "${re2c_manpage_rust}" "${re2c_manpage_bootstrap_rust}" "rust")
-re2c_gen_help("${re2c_help_source}" "${re2c_help}" "${re2c_help_bootstrap}")
-add_custom_target(docs DEPENDS "${re2c_docs}")
-
 # install targets are enabled only if re2c is the root project
 if(RE2C_IS_ROOT_PROJECT)
     # install
@@ -462,6 +383,7 @@ if(RE2C_IS_ROOT_PROJECT)
     install(FILES "${re2c_manpage_c}" DESTINATION "share/man/man1")
     if(RE2C_BUILD_RE2D)
         install(TARGETS re2d RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_d}" DESTINATION "share/man/man1")
     endif()
     if(RE2C_BUILD_RE2GO)
         install(TARGETS re2go RUNTIME DESTINATION bin)
@@ -469,12 +391,23 @@ if(RE2C_IS_ROOT_PROJECT)
     endif()
     if(RE2C_BUILD_RE2HS)
         install(TARGETS re2hs RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_haskell}" DESTINATION "share/man/man1")
+    endif()
+    if(RE2C_BUILD_RE2JAVA)
+        install(TARGETS re2java RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_java}" DESTINATION "share/man/man1")
+    endif()
+    if(RE2C_BUILD_RE2JS)
+        install(TARGETS re2js RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_js}" DESTINATION "share/man/man1")
     endif()
     if(RE2C_BUILD_RE2OCAML)
         install(TARGETS re2ocaml RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_ocaml}" DESTINATION "share/man/man1")
     endif()
     if(RE2C_BUILD_RE2PY)
         install(TARGETS re2py RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_python}" DESTINATION "share/man/man1")
     endif()
     if(RE2C_BUILD_RE2RUST)
         install(TARGETS re2rust RUNTIME DESTINATION bin)
@@ -482,9 +415,11 @@ if(RE2C_IS_ROOT_PROJECT)
     endif()
     if(RE2C_BUILD_RE2V)
         install(TARGETS re2v RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_v}" DESTINATION "share/man/man1")
     endif()
     if(RE2C_BUILD_RE2ZIG)
         install(TARGETS re2zig RUNTIME DESTINATION bin)
+        install(FILES "${re2c_manpage_zig}" DESTINATION "share/man/man1")
     endif()
     install(FILES
         include/syntax/c
diff --git a/Makefile.am b/Makefile.am
index d7d5215ff..b76179714 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -164,14 +164,19 @@ nodist_re2c_SOURCES = $(re2c_GEN)
 BUILT_SOURCES = $(re2c_GEN_SRC)
 
 # bootstrap sources
-re2c_BOOT_DOC_C = bootstrap/doc/re2c.1
-re2c_BOOT_DOC_GO = bootstrap/doc/re2go.1
-re2c_BOOT_DOC_RUST = bootstrap/doc/re2rust.1
 re2c_BOOT_HELP = bootstrap/src/msg/help.cc
 re2c_BOOT = \
-	$(re2c_BOOT_DOC_C) \
-	$(re2c_BOOT_DOC_GO) \
-	$(re2c_BOOT_DOC_RUST) \
+	bootstrap/doc/re2c.1 \
+	bootstrap/doc/re2d.1 \
+	bootstrap/doc/re2go.1 \
+	bootstrap/doc/re2hs.1 \
+	bootstrap/doc/re2java.1 \
+	bootstrap/doc/re2js.1 \
+	bootstrap/doc/re2ocaml.1 \
+	bootstrap/doc/re2py.1 \
+	bootstrap/doc/re2rust.1 \
+	bootstrap/doc/re2v.1 \
+	bootstrap/doc/re2zig.1 \
 	$(re2c_BOOT_HELP) \
 	bootstrap/src/msg/ver_to_vernum.cc \
 	bootstrap/src/options/parse_opts.cc \
@@ -207,6 +212,8 @@ re2c_CUSTOM = \
 
 # docs
 re2c_SRC_DOC = doc/manpage.rst
+# To build the list of examples for all backends, run:
+# find examples -regex '.*\.re\|.*\(state\|01_basic\|definitions\)\..*' -printf '\t%p \\\n' | sort
 re2c_SRC_DOC_EXT = \
 	doc/manual/api/api1.rst_ \
 	doc/manual/api/api2_c.rst_ \
@@ -246,8 +253,8 @@ re2c_SRC_DOC_EXT = \
 	doc/manual/syntax/syntax.rst_ \
 	doc/manual/warnings/warnings_general.rst_ \
 	doc/manual/warnings/warnings_list.rst_ \
-	examples/c/01_basic.re \
 	examples/c/01_basic.c \
+	examples/c/01_basic.re \
 	examples/c/conditions/parse_u32_blocks.re \
 	examples/c/conditions/parse_u32_conditions.re \
 	examples/c/encodings/unicode_identifier.re \
@@ -255,12 +262,16 @@ re2c_SRC_DOC_EXT = \
 	examples/c/eof/02_bounds_checking.re \
 	examples/c/eof/03_eof_rule.re \
 	examples/c/eof/04_fake_sentinel.re \
+	examples/c/eof/05_fake_sentinel_eof_rule.re \
 	examples/c/fill/01_fill.re \
 	examples/c/fill/02_fill.re \
+	examples/c/generic_api/ifstream.re \
 	examples/c/headers/header.re \
 	examples/c/headers/lexer/state.h \
-	examples/c/includes/include.re \
 	examples/c/includes/definitions.h \
+	examples/c/includes/include.re \
+	examples/c/real_world/cxx98.re \
+	examples/c/reuse/braille.re \
 	examples/c/reuse/reuse.re \
 	examples/c/reuse/usedir.re \
 	examples/c/state/push.re \
@@ -269,8 +280,36 @@ re2c_SRC_DOC_EXT = \
 	examples/c/submatch/02_mtags.re \
 	examples/c/submatch/03_captures.re \
 	examples/c/submatch/04_posix_captures.re \
-	examples/go/01_basic.re \
+	examples/c/submatch/http_rfc7230.re \
+	examples/c/submatch/parse_etc_passwd.re \
+	examples/c/submatch/parse_options.re \
+	examples/c/submatch/parse_records.re \
+	examples/c/submatch/uri_rfc3986.re \
+	examples/d/01_basic.d \
+	examples/d/01_basic.re \
+	examples/d/conditions/parse_u32_blocks.re \
+	examples/d/conditions/parse_u32_conditions.re \
+	examples/d/encodings/unicode_identifier.re \
+	examples/d/eof/01_sentinel.re \
+	examples/d/eof/02_bounds_checking.re \
+	examples/d/eof/03_eof_rule.re \
+	examples/d/eof/04_fake_sentinel.re \
+	examples/d/fill/01_fill.re \
+	examples/d/fill/02_fill.re \
+	examples/d/headers/header.re \
+	examples/d/headers/lexer/state.d \
+	examples/d/includes/definitions.d \
+	examples/d/includes/include.re \
+	examples/d/reuse/reuse.re \
+	examples/d/reuse/usedir.re \
+	examples/d/state/push.re \
+	examples/d/submatch/01_stags_fill.re \
+	examples/d/submatch/01_stags.re \
+	examples/d/submatch/02_mtags.re \
+	examples/d/submatch/03_captures.re \
+	examples/d/submatch/04_posix_captures.re \
 	examples/go/01_basic.go \
+	examples/go/01_basic.re \
 	examples/go/conditions/parse_u32_blocks.re \
 	examples/go/conditions/parse_u32_conditions.re \
 	examples/go/encodings/unicode_identifier.re \
@@ -282,8 +321,8 @@ re2c_SRC_DOC_EXT = \
 	examples/go/fill/02_fill.re \
 	examples/go/headers/header.re \
 	examples/go/headers/lexer/state.go \
-	examples/go/includes/include.re \
 	examples/go/includes/definitions.go \
+	examples/go/includes/include.re \
 	examples/go/reuse/reuse.re \
 	examples/go/reuse/usedir.re \
 	examples/go/state/push.re \
@@ -292,6 +331,120 @@ re2c_SRC_DOC_EXT = \
 	examples/go/submatch/02_mtags.re \
 	examples/go/submatch/03_captures.re \
 	examples/go/submatch/04_posix_captures.re \
+	examples/haskell/01_basic.hs \
+	examples/haskell/01_basic.re \
+	examples/haskell/conditions/parse_u32_blocks.re \
+	examples/haskell/conditions/parse_u32_conditions.re \
+	examples/haskell/encodings/unicode_identifier.re \
+	examples/haskell/eof/01_sentinel.re \
+	examples/haskell/eof/02_bounds_checking.re \
+	examples/haskell/eof/03_eof_rule.re \
+	examples/haskell/eof/04_fake_sentinel.re \
+	examples/haskell/fill/01_fill.re \
+	examples/haskell/fill/02_fill.re \
+	examples/haskell/headers/header.re \
+	examples/haskell/headers/lexer/state.hs \
+	examples/haskell/includes/definitions.hs \
+	examples/haskell/includes/include.re \
+	examples/haskell/reuse/reuse.re \
+	examples/haskell/reuse/usedir.re \
+	examples/haskell/state/push.re \
+	examples/haskell/submatch/01_stags_fill.re \
+	examples/haskell/submatch/01_stags.re \
+	examples/haskell/submatch/02_mtags.re \
+	examples/haskell/submatch/03_captures.re \
+	examples/java/01_basic.java \
+	examples/java/01_basic.re \
+	examples/java/conditions/parse_u32_blocks.re \
+	examples/java/conditions/parse_u32_conditions.re \
+	examples/java/encodings/unicode_identifier.re \
+	examples/java/eof/01_sentinel.re \
+	examples/java/eof/02_bounds_checking.re \
+	examples/java/eof/03_eof_rule.re \
+	examples/java/eof/04_fake_sentinel.re \
+	examples/java/fill/01_fill.re \
+	examples/java/fill/02_fill.re \
+	examples/java/headers/header.re \
+	examples/java/headers/lexer/state.java \
+	examples/java/includes/definitions.java \
+	examples/java/includes/include.re \
+	examples/java/reuse/reuse.re \
+	examples/java/reuse/usedir.re \
+	examples/java/state/push.re \
+	examples/java/submatch/01_stags_fill.re \
+	examples/java/submatch/01_stags.re \
+	examples/java/submatch/02_mtags.re \
+	examples/java/submatch/03_captures.re \
+	examples/java/submatch/04_posix_captures.re \
+	examples/js/01_basic.js \
+	examples/js/01_basic.re \
+	examples/js/conditions/parse_u32_blocks.re \
+	examples/js/conditions/parse_u32_conditions.re \
+	examples/js/encodings/unicode_identifier.re \
+	examples/js/eof/01_sentinel.re \
+	examples/js/eof/02_bounds_checking.re \
+	examples/js/eof/03_eof_rule.re \
+	examples/js/eof/04_fake_sentinel.re \
+	examples/js/fill/01_fill.re \
+	examples/js/fill/02_fill.re \
+	examples/js/headers/header.re \
+	examples/js/headers/lexer/state.js \
+	examples/js/includes/definitions.js \
+	examples/js/includes/include.re \
+	examples/js/reuse/reuse.re \
+	examples/js/reuse/usedir.re \
+	examples/js/state/push.re \
+	examples/js/submatch/01_stags_fill.re \
+	examples/js/submatch/01_stags.re \
+	examples/js/submatch/02_mtags.re \
+	examples/js/submatch/03_captures.re \
+	examples/js/submatch/04_posix_captures.re \
+	examples/ocaml/01_basic.ml \
+	examples/ocaml/01_basic.re \
+	examples/ocaml/conditions/parse_u32_blocks.re \
+	examples/ocaml/conditions/parse_u32_conditions.re \
+	examples/ocaml/encodings/unicode_identifier.re \
+	examples/ocaml/eof/01_sentinel.re \
+	examples/ocaml/eof/02_bounds_checking.re \
+	examples/ocaml/eof/03_eof_rule.re \
+	examples/ocaml/eof/04_fake_sentinel.re \
+	examples/ocaml/fill/01_fill.re \
+	examples/ocaml/fill/02_fill.re \
+	examples/ocaml/headers/header.re \
+	examples/ocaml/headers/lexer/state.ml \
+	examples/ocaml/includes/definitions.ml \
+	examples/ocaml/includes/include.re \
+	examples/ocaml/reuse/reuse.re \
+	examples/ocaml/reuse/usedir.re \
+	examples/ocaml/state/push.re \
+	examples/ocaml/submatch/01_stags_fill.re \
+	examples/ocaml/submatch/01_stags.re \
+	examples/ocaml/submatch/02_mtags.re \
+	examples/ocaml/submatch/03_captures.re \
+	examples/ocaml/submatch/04_posix_captures.re \
+	examples/python/01_basic.py \
+	examples/python/01_basic.re \
+	examples/python/conditions/parse_u32_blocks.re \
+	examples/python/conditions/parse_u32_conditions.re \
+	examples/python/encodings/unicode_identifier.re \
+	examples/python/eof/01_sentinel.re \
+	examples/python/eof/02_bounds_checking.re \
+	examples/python/eof/03_eof_rule.re \
+	examples/python/eof/04_fake_sentinel.re \
+	examples/python/fill/01_fill.re \
+	examples/python/fill/02_fill.re \
+	examples/python/headers/header.re \
+	examples/python/headers/lexer/state.py \
+	examples/python/includes/definitions.py \
+	examples/python/includes/include.re \
+	examples/python/reuse/reuse.re \
+	examples/python/reuse/usedir.re \
+	examples/python/state/push.re \
+	examples/python/submatch/01_stags_fill.re \
+	examples/python/submatch/01_stags.re \
+	examples/python/submatch/02_mtags.re \
+	examples/python/submatch/03_captures.re \
+	examples/python/submatch/04_posix_captures.re \
 	examples/rust/01_basic.re \
 	examples/rust/01_basic.rs \
 	examples/rust/conditions/parse_u32_blocks.re \
@@ -305,8 +458,9 @@ re2c_SRC_DOC_EXT = \
 	examples/rust/fill/02_fill.re \
 	examples/rust/headers/header.re \
 	examples/rust/headers/lexer/state.rs \
-	examples/rust/includes/include.re \
 	examples/rust/includes/definitions.rs \
+	examples/rust/includes/include.re \
+	examples/rust/real_world/c.re \
 	examples/rust/reuse/reuse.re \
 	examples/rust/reuse/usedir.re \
 	examples/rust/state/push.re \
@@ -314,19 +468,84 @@ re2c_SRC_DOC_EXT = \
 	examples/rust/submatch/01_stags.re \
 	examples/rust/submatch/02_mtags.re \
 	examples/rust/submatch/03_captures.re \
-	examples/rust/submatch/04_posix_captures.re
-
-DOC_C = doc/re2c.1
-DOCS = $(DOC_C)
-
+	examples/rust/submatch/04_posix_captures.re \
+	examples/v/01_basic.re \
+	examples/v/01_basic.v \
+	examples/v/conditions/parse_u32_blocks.re \
+	examples/v/conditions/parse_u32_conditions.re \
+	examples/v/encodings/unicode_identifier.re \
+	examples/v/eof/01_sentinel.re \
+	examples/v/eof/02_bounds_checking.re \
+	examples/v/eof/03_eof_rule.re \
+	examples/v/eof/04_fake_sentinel.re \
+	examples/v/fill/01_fill.re \
+	examples/v/fill/02_fill.re \
+	examples/v/headers/header.re \
+	examples/v/headers/lexer/state.v \
+	examples/v/includes/definitions.v \
+	examples/v/includes/include.re \
+	examples/v/reuse/reuse.re \
+	examples/v/reuse/usedir.re \
+	examples/v/state/push.re \
+	examples/v/submatch/01_stags_fill.re \
+	examples/v/submatch/01_stags.re \
+	examples/v/submatch/02_mtags.re \
+	examples/v/submatch/03_captures.re \
+	examples/v/submatch/04_posix_captures.re \
+	examples/zig/01_basic.re \
+	examples/zig/01_basic.zig \
+	examples/zig/conditions/parse_u32_blocks.re \
+	examples/zig/conditions/parse_u32_conditions.re \
+	examples/zig/encodings/unicode_identifier.re \
+	examples/zig/eof/01_sentinel.re \
+	examples/zig/eof/02_bounds_checking.re \
+	examples/zig/eof/03_eof_rule.re \
+	examples/zig/eof/04_fake_sentinel.re \
+	examples/zig/fill/01_fill.re \
+	examples/zig/fill/02_fill.re \
+	examples/zig/headers/header.re \
+	examples/zig/headers/lexer/state.zig \
+	examples/zig/includes/definitions.zig \
+	examples/zig/includes/include.re \
+	examples/zig/reuse/reuse.re \
+	examples/zig/reuse/usedir.re \
+	examples/zig/state/push.re \
+	examples/zig/submatch/01_stags_fill.re \
+	examples/zig/submatch/01_stags.re \
+	examples/zig/submatch/02_mtags.re \
+	examples/zig/submatch/03_captures.re \
+	examples/zig/submatch/04_posix_captures.re
+
+DOCS = doc/re2c.1
+if WITH_DLANG
+DOCS += doc/re2d.1
+endif
 if WITH_GOLANG
-DOC_GO = doc/re2go.1
-DOCS += $(DOC_GO)
+DOCS += doc/re2go.1
+endif
+if WITH_HASKELL
+DOCS += doc/re2hs.1
+endif
+if WITH_JAVA
+DOCS += doc/re2java.1
+endif
+if WITH_JS
+DOCS += doc/re2js.1
+endif
+if WITH_OCAML
+DOCS += doc/re2ocaml.1
+endif
+if WITH_PYTHON
+DOCS += doc/re2py.1
 endif
-
 if WITH_RUST
-DOC_RUST = doc/re2rust.1
-DOCS += $(DOC_RUST)
+DOCS += doc/re2rust.1
+endif
+if WITH_VLANG
+DOCS += doc/re2v.1
+endif
+if WITH_ZIG
+DOCS += doc/re2zig.1
 endif
 
 man_MANS = $(DOCS)
@@ -438,27 +657,13 @@ docs: $(DOCS) $(re2c_GEN_HELP)
 RST2TXT = $(top_srcdir)/build/rst2txt.py
 RST2MAN = $(top_srcdir)/build/rst2man.py
 SPLITMAN = $(top_srcdir)/build/split_man.py
-# generate manpage for C
-$(DOC_C): $(re2c_SRC_DOC) $(re2c_SRC_DOC_EXT) $(SPLITMAN) $(RST2MAN)
+# generate manpage
+doc/re2%.1: $(re2c_SRC_DOC) $(re2c_SRC_DOC_EXT) $(SPLITMAN) $(RST2MAN)
 	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)$(PYTHON) $(SPLITMAN) $(top_builddir)/$(re2c_SRC_DOC) $(top_builddir)/$(re2c_SRC_DOC).c c \
-		&& $(PYTHON) $(RST2MAN) --tab-width=4 $(top_builddir)/$(re2c_SRC_DOC).c > $@ \
-		&& cp $@ $(top_srcdir)/$(re2c_BOOT_DOC_C) \
-		&& rm $(top_builddir)/$(re2c_SRC_DOC).c
-# generate manpage for Go
-$(DOC_GO): $(re2c_SRC_DOC) $(re2c_SRC_DOC_EXT) $(SPLITMAN) $(RST2MAN)
-	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)$(PYTHON) $(SPLITMAN) $(top_builddir)/$(re2c_SRC_DOC) $(top_builddir)/$(re2c_SRC_DOC).go go \
-		&& $(PYTHON) $(RST2MAN) --tab-width=4 $(top_builddir)/$(re2c_SRC_DOC).go > $@ \
-		&& cp $@ $(top_srcdir)/$(re2c_BOOT_DOC_GO) \
-		&& rm $(top_builddir)/$(re2c_SRC_DOC).go
-# generate manpage for Rust
-$(DOC_RUST): $(re2c_SRC_DOC) $(re2c_SRC_DOC_EXT) $(SPLITMAN) $(RST2MAN)
-	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)$(PYTHON) $(SPLITMAN) $(top_builddir)/$(re2c_SRC_DOC) $(top_builddir)/$(re2c_SRC_DOC).rust rust \
-		&& $(PYTHON) $(RST2MAN) --tab-width=4 $(top_builddir)/$(re2c_SRC_DOC).rust > $@ \
-		&& cp $@ $(top_srcdir)/$(re2c_BOOT_DOC_RUST) \
-		&& rm $(top_builddir)/$(re2c_SRC_DOC).rust
+	$(AM_V_GEN)$(PYTHON) $(SPLITMAN) $(top_builddir)/$(re2c_SRC_DOC) $(top_builddir)/$@.rst \
+		&& $(PYTHON) $(RST2MAN) --tab-width=4 $(top_builddir)/$@.rst > $@ \
+		&& cp $@ $(top_srcdir)/bootstrap/$@ \
+		&& rm $(top_builddir)/$@.rst
 # generate help
 $(re2c_GEN_HELP): $(re2c_CUSTOM_HELP) $(re2c_SRC_DOC_EXT) $(RST2TXT)
 	$(AM_V_at)$(MKDIR_P) $(@D)
@@ -467,18 +672,10 @@ $(re2c_GEN_HELP): $(re2c_CUSTOM_HELP) $(re2c_SRC_DOC_EXT) $(RST2TXT)
 else
 docs: $(DOCS) $(re2c_GEN_HELP)
 	$(AM_V_at)echo "Reconfigure with --enable-docs to rebuild docs"
-# copy bootstrap manpage for C
-$(DOC_C): $(re2c_BOOT_DOC_C)
-	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)cp $(top_srcdir)/$(re2c_BOOT_DOC_C) $@
-# copy bootstrap manpage for Go
-$(DOC_GO): $(re2c_BOOT_DOC_GO)
-	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)cp $(top_srcdir)/$(re2c_BOOT_DOC_GO) $@
-# copy bootstrap manpage for Rust
-$(DOC_RUST): $(re2c_BOOT_DOC_RUST)
+# copy bootstrap manpage
+doc/re2%.1: bootstrap/doc/re2%.1
 	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN)cp $(top_srcdir)/$(re2c_BOOT_DOC_RUST) $@
+	$(AM_V_GEN)cp $(top_srcdir)/bootstrap/$@ $@
 # copy bootstrap help
 $(re2c_GEN_HELP): $(re2c_BOOT_HELP)
 	$(AM_V_at)$(MKDIR_P) $(@D)
diff --git a/bootstrap/doc/re2c.1 b/bootstrap/doc/re2c.1
index b765b8ac8..bc8abfe92 100644
--- a/bootstrap/doc/re2c.1
+++ b/bootstrap/doc/re2c.1
@@ -242,8 +242,8 @@ should be defined as pointers of type \fBYYCTYPE*\fP\&.
 .B \fBRecord API\fP
 (\fIadded in version 4.0\fP)
 Record API is useful in cases when lexer state must be stored in a struct.
-It is enabled with option \fB\-\-api record\fP or configuration
-\fBre2c:api = record\fP\&. This API consists of a variable \fByyrecord\fP (the
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
 name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
 defined as a struct with fields \fByycursor\fP, \fByymarker\fP, \fByyctxmarker\fP,
 \fByylimit\fP (only the fields used by the generated code need to be defined,
@@ -255,8 +255,8 @@ and their names can be configured).
 .TP
 .B \fBGeneric API\fP
 (\fIadded in version 0.14\fP)
-This is the most flexible API provided by re2c. It is enabled with
-\fB\-\-api generic\fP option or \fBre2c:api = generic\fP configuration.
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
 This API contains primitives for generic operations:
 \fBYYPEEK\fP,
 \fBYYSKIP\fP,
@@ -2795,53 +2795,64 @@ int main() {
 .SH SUBMATCH EXTRACTION
 .sp
 re2c has two options for submatch extraction.
-.sp
-The first option is \fB\-T \-\-tags\fP\&. With this option one can use standalone tags
-of the form \fB@stag\fP and \fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary
-used\-defined names. Tags can be used anywhere inside of a regular expression;
-semantically they are just position markers. Tags of the form \fB@stag\fP are
-called s\-tags: they denote a single submatch value (the last input position
-where this tag matched). Tags of the form \fB#mtag\fP are called m\-tags: they
-denote multiple submatch values (the whole history of repetitions of this tag).
-All tags should be defined by the user as variables with the corresponding
-names. With standalone tags re2c uses leftmost greedy disambiguation: submatch
-positions correspond to the leftmost matching path through the regular
-expression.
-.sp
-The second option is \fB\-P \-\-posix\-captures\fP: it enables POSIX\-compliant
-capturing groups. In this mode parentheses in regular expressions denote the
-beginning and the end of capturing groups; the whole regular expression is group
-number zero. The number of groups for the matching rule is stored in a variable
-\fByynmatch\fP, and submatch results are stored in \fByypmatch\fP array. Both
-\fByynmatch\fP and \fByypmatch\fP should be defined by the user, and \fByypmatch\fP
-size must be at least \fB[yynmatch * 2]\fP\&. re2c provides a directive
-\fB/*!maxnmatch:re2c*/\fP that defines \fBYYMAXNMATCH\fP: a constant  equal to the
-maximal value of \fByynmatch\fP among all rules. Note that re2c implements
-POSIX\-compliant disambiguation: each subexpression matches as long as possible,
-and subexpressions that start earlier in regular expression have priority over
-those starting later. Capturing groups are translated into s\-tags under the
-hood, therefore we use the word \(dqtag\(dq to describe them as well.
-.sp
-With both \fB\-P \-\-posix\-captures\fP and \fBT \-\-tags\fP options re2c uses efficient
-submatch extraction algorithm described in the
-\fI\%Tagged Deterministic Finite Automata with Lookahead\fP
-paper. The overhead on submatch extraction in the generated lexer grows with the
-number of tags \-\-\- if this number is moderate, the overhead is barely
-noticeable. In the lexer tags are implemented using a number of tag variables
-generated by re2c. There is no one\-to\-one correspondence between tag variables
-and tags: a single variable may be reused for different tags, and one tag may
-require multiple variables to hold all its ambiguous values. Eventually
-ambiguity is resolved, and only one final variable per tag survives. When a rule
-matches, all its tags are set to the values of the corresponding tag variables.
-The exact number of tag variables is unknown to the user; this number is
-determined by re2c. However, tag variables should be defined by the user as a
-part of the lexer state and updated by \fBYYFILL\fP, therefore re2c provides
-directives \fB/*!stags:re2c*/\fP and \fB/*!mtags:re2c*/\fP that can be used to
-declare, initialize and manipulate tag variables. These directives have two
-optional configurations: \fBformat = \(dq@@\(dq;\fP (specifies the template where \fB@@\fP
-is substituted with the name of each tag variable), and \fBseparator = \(dq\(dq;\fP
-(specifies the piece of code used to join the generated pieces for different
-tag variables).
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
 .sp
 S\-tags support the following operations:
 .INDENT 0.0
@@ -3069,7 +3080,7 @@ int main() {
 .UNINDENT
 .UNINDENT
 .sp
-Here is an example of using POSIX capturing groups to parse semantic versions.
+Here is an example of using capturing groups to parse semantic versions.
 .INDENT 0.0
 .INDENT 3.5
 .sp
@@ -3079,9 +3090,6 @@ Here is an example of using POSIX capturing groups to parse semantic versions.
 #include <assert.h>
 #include <stddef.h>
 
-// Maximum number of capturing groups among all rules.
-/*!maxnmatch:re2c*/
-
 struct SemVer { int major, minor, patch; };
 
 static int s2n(const char *s, const char *e) { // pre\-parsed string to number
@@ -3093,9 +3101,8 @@ static int s2n(const char *s, const char *e) { // pre\-parsed string to number
 static bool lex(const char *str, SemVer &ver) {
     const char *YYCURSOR = str, *YYMARKER;
 
-    // Allocate memory for capturing parentheses (twice the number of groups).
-    const char *yypmatch[YYMAXNMATCH * 2];
-    size_t yynmatch;
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqconst char *@@;\en\(aq; */
 
     // Intermediate tag variables used by the lexer (must be autogenerated).
     /*!stags:re2c format = \(aqconst char *@@;\en\(aq; */
@@ -3103,18 +3110,15 @@ static bool lex(const char *str, SemVer &ver) {
     /*!re2c
         re2c:yyfill:enable = 0;
         re2c:define:YYCTYPE = char;
-        re2c:posix\-captures = 1;
+        re2c:captvars = 1;
 
         num = [0\-9]+;
 
         (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
-            // \(gayynmatch\(ga is the number of capturing groups
-            assert(yynmatch == 4);
-            // Even \(gayypmatch\(ga values are for opening parentheses, odd values
-            // are for closing parentheses, the first group is the whole match.
-            ver.major = s2n(yypmatch[2], yypmatch[3]);
-            ver.minor = s2n(yypmatch[4], yypmatch[5]);
-            ver.patch = yypmatch[6] ? s2n(yypmatch[6] + 1, yypmatch[7]) : 0;
+            (void) yytl0; (void) yytr0; // some variables are unused
+            ver.major = s2n(yytl1, yytr1);
+            ver.minor = s2n(yytl2, yytr2);
+            ver.patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0;
             return true;
         }
         * { return false; }
diff --git a/bootstrap/doc/re2d.1 b/bootstrap/doc/re2d.1
new file mode 100644
index 000000000..55caf63f5
--- /dev/null
+++ b/bootstrap/doc/re2d.1
@@ -0,0 +1,3449 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for D, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-i
+module main;
+
+private bool lex(const(char)* yycursor) {
+    /*!re2c
+        re2c:define:YYCTYPE = char;
+        re2c:yyfill:enable = 0;
+
+        number = [1\-9][0\-9]*;
+        number { return true; }
+        *      { return false; }
+    */
+}
+
+void main() {
+    assert(lex(\(dq1234\(dq));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+/* Generated by re2d */
+// re2d $INPUT \-o $OUTPUT \-i
+module main;
+
+private bool lex(const(char)* yycursor) {
+    
+{
+    char yych;
+    yych = *yycursor;
+    switch (yych) {
+        case \(aq1\(aq: .. case \(aq9\(aq: goto yy2;
+        default: goto yy1;
+    }
+yy1:
+    ++yycursor;
+    { return false; }
+yy2:
+    yych = *++yycursor;
+    switch (yych) {
+        case \(aq0\(aq: .. case \(aq9\(aq: goto yy2;
+        default: goto yy3;
+    }
+yy3:
+    { return true; }
+}
+
+}
+
+void main() {
+    assert(lex(\(dq1234\(dq));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2d has three API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBSimple API\fP
+This is the default API for D backend. It consists of primitives
+\fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP and \fBYYLIMIT\fP, which
+should be defined as pointers of type \fBYYCTYPE*\fP\&.
+.nf
+
+.fi
+.sp
+.TP
+.B \fBRecord API\fP
+Record API is useful in cases when lexer state must be stored in a struct.
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
+name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
+defined as a struct with fields \fByycursor\fP, \fByymarker\fP, \fByyctxmarker\fP,
+\fByylimit\fP (only the fields used by the generated code need to be defined,
+and their names can be configured).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+// Expect a null\-terminated string.
+private int lex(const(char)* yycursor) {
+    uint count = 0;
+
+    for (;;) {
+    /*!re2c
+        re2c:define:YYCTYPE = char;
+        re2c:yyfill:enable = 0;
+
+        *      { return \-1; }
+        [\ex00] { return count; }
+        [a\-z]+ { ++count; continue; }
+        [ ]+   { continue; }
+    */
+    }
+    assert(0); // unreachable
+}
+
+void main() {
+    assert(lex(\(dq\(dq) == 0);
+    assert(lex(\(dqone two three\(dq) == 3);
+    assert(lex(\(dqf0ur\(dq) == \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+// Expect a null\-terminated string.
+private int lex(immutable char[] s) {
+    const(char)* yycursor = s.ptr, yylimit = s.ptr + s.length, yymarker;
+    int count = 0;
+
+    for (;;) {
+    /*!re2c
+        re2c:define:YYCTYPE = char;
+        re2c:yyfill:enable = 0;
+        re2c:eof = 0;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        *    { return \-1; }
+        $    { return count; }
+        str  { ++count; continue; }
+        [ ]+ { continue; }
+    */
+    }
+    assert(0); // unreachable
+}
+
+void main() {
+    assert(lex(\(dq\(dq) == 0);
+    assert(lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq) == 3);
+    assert(lex(\(dq\(aqunterminated\e\e\(aq\(dq) == \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+import core.stdc.stdlib;
+import core.stdc.string;
+
+/*!max:re2c*/
+
+private int lex(immutable char[] s) {
+    // Make a copy of the string with YYMAXFILL zeroes at the end.
+    char *buf = cast(char*) malloc(s.length + YYMaxFill);
+    memcpy(buf, cast(const(void*)) s, s.length);
+    memset(buf + s.length, 0, YYMaxFill);
+
+    const(char)* yycursor = buf;
+    const(char)* yylimit = buf + s.length + YYMaxFill;
+    int count = 0;
+
+loop:
+    /*!re2c
+        re2c:define:YYCTYPE = char;
+        re2c:define:YYFILL = \(dqgoto fail;\(dq;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        [\ex00] {
+            // Check that it is the sentinel, not some unexpected null.
+            if (yycursor \- 1 == buf + s.length) goto exit; else goto fail;
+        }
+        str  { ++count; goto loop; }
+        [ ]+ { goto loop; }
+        *    { goto fail; }
+    */
+fail:
+    count = \-1;
+exit:
+    free(buf);
+    return count;
+}
+
+void main() {
+    assert(lex(\(dq\(dq) == 0);
+    assert(lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq) == 3);
+    assert(lex(\(dq\(aqunterminated\e\e\(aq\(dq) == \-1);
+    assert(lex(\(dq\(aqunexpected \e0 null\e\e\(aq\(dq) == \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+import core.stdc.stdlib;
+import core.stdc.string;
+
+private int lex(immutable char[] s) {
+    // For the sake of example create a string without terminating null.
+    char *buf = cast(char*) malloc(s.length);
+    memcpy(buf, cast(const(void*)) s, s.length);
+
+    const(char) *cur = buf, lim = buf + s.length;
+    int count = 0;
+
+    for (;;) {
+    /*!re2c
+        re2c:api = generic;
+        re2c:yyfill:enable = 0;
+        re2c:define:YYCTYPE = char;
+        re2c:define:YYPEEK  = \(dqcur < lim ? *cur : 0\(dq;  // fake null
+        re2c:define:YYSKIP  = \(dq++cur;\(dq;
+
+        *      { count = \-1; break; }
+        [\ex00] { break;{} }
+        [a\-z]+ { ++count; continue;{} }
+        [ ]+   { continue; }
+    */
+    }
+
+    free(buf);
+    return count;
+}
+
+void main() {
+    assert(lex(\(dq\(dq) == 0);
+    assert(lex(\(dqone two three \(dq) == 3);
+    assert(lex(\(dqf0ur\(dq) == \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+import core.stdc.string;
+import core.stdc.stdio;
+
+enum BUFSIZE = 4095;
+
+struct Input {
+    FILE* file;
+    char[BUFSIZE + 1] buffer;// +1 for sentinel
+    char* yylimit, yycursor, yymarker, token;
+    bool eof;
+};
+
+private int fill(ref Input it) {
+    if (it.eof) return 1;
+
+    const size_t shift = it.token \- it.buffer.ptr;
+    const size_t used = it.yylimit \- it.token;
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (shift < 1) return 2;
+
+    // Shift buffer contents (discard everything up to the current token).
+    memmove(cast(void*)it.buffer.ptr, it.token, used);
+    it.yylimit \-= shift;
+    it.yycursor \-= shift;
+    it.yymarker \-= shift;
+    it.token \-= shift;
+
+    // Fill free space at the end of buffer with new data from file.
+    it.yylimit += fread(it.yylimit, 1, BUFSIZE \- used, it.file);
+    it.yylimit[0] = 0;
+    it.eof = it.yylimit < (it.buffer.ptr + BUFSIZE);
+    return 0;
+}
+
+private int lex(ref Input yyrecord) {
+    int count = 0;
+    for (;;) {
+        yyrecord.token = yyrecord.yycursor;
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:define:YYFILL = \(dqfill(yyrecord) == 0\(dq;
+        re2c:eof = 0;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        *    { return \-1; }
+        $    { return count; }
+        str  { ++count; continue; }
+        [ ]+ { continue; }
+    */
+    }
+    assert(0);
+}
+
+void main() {
+    const char[] fname = \(dqinput\(dq;
+    const char[] content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    FILE* f = fopen(fname.ptr, \(dqw\(dq);
+    for (int i = 0; i < BUFSIZE; ++i) {
+        fwrite(cast(const(void*)) content.ptr, 1, content.length \- 1, f);
+    }
+    fclose(f);
+    int count = 3 * BUFSIZE; // number of quoted strings written to file
+
+    // Initialize lexer state: all pointers are at the end of buffer.
+    Input it;
+    it.file = fopen(fname.ptr, \(dqr\(dq);
+    it.yycursor = it.yymarker = it.token = it.yylimit = it.buffer.ptr + BUFSIZE;
+    it.eof = 0;
+    // Sentinel (at YYLIMIT pointer) is set to zero, which triggers YYFILL.
+    it.yylimit[0] = 0;
+
+    // Run the lexer.
+    assert(lex(it) == count);
+
+    // Cleanup: remove input file.
+    fclose(it.file);
+    remove(fname.ptr);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+import core.stdc.string;
+import core.stdc.stdio;
+
+/*!max:re2c*/
+enum BufSize = (4096 \- YYMaxFill);
+
+struct Input {
+    FILE* file;
+    char[BufSize + YYMaxFill] buffer;
+    char* yylimit, yycursor, token;
+    bool eof;
+};
+
+private int fill(ref Input it, size_t need) {
+    if (it.eof) return 1;
+
+    const size_t shift = it.token \- it.buffer.ptr;
+    const size_t used = it.yylimit \- it.token;
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (shift < need) return 2;
+
+    // Shift buffer contents (discard everything up to the current token).
+    memmove(it.buffer.ptr, it.token, used);
+    it.yylimit \-= shift;
+    it.yycursor \-= shift;
+    it.token \-= shift;
+
+    // Fill free space at the end of buffer with new data from file.
+    it.yylimit += fread(it.yylimit, 1, BufSize \- used, it.file);
+
+    // If read less than expected, this is end of input => add zero padding
+    // so that the lexer can access characters at the end of buffer.
+    if (it.yylimit < it.buffer.ptr + BufSize) {
+        it.eof = true;
+        memset(it.yylimit, 0, YYMaxFill);
+        it.yylimit += YYMaxFill;
+    }
+
+    return 0;
+}
+
+private int lex(ref Input yyrecord) {
+    int count = 0;
+    for (;;) {
+        yyrecord.token = yyrecord.yycursor;
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:define:YYFILL = \(dqif (fill(yyrecord, @@) != 0) return \-1;\(dq;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        [\ex00] {
+            // Check that it is the sentinel, not some unexpected null.
+            return yyrecord.token == yyrecord.yylimit \- YYMaxFill ? count : \-1;
+        }
+        str  { ++count; continue; }
+        [ ]+ { continue; }
+        *    { return \-1; }
+    */
+    }
+    assert(0);
+}
+
+void main() {
+    const char[] fname = \(dqinput\(dq;
+    const char[] content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    FILE* f = fopen(fname.ptr, \(dqw\(dq);
+    for (int i = 0; i < BufSize; ++i) {
+        fwrite(content.ptr, 1, content.length \- 1, f);
+    }
+    fclose(f);
+    int count = 3 * BufSize; // number of quoted strings written to file
+
+    // Initialize lexer state: all pointers are at the end of buffer.
+    // This immediately triggers YYFILL, as the check \(gait.yycursor < it.yylimit\(ga fails.
+    Input it;
+    it.file = fopen(fname.ptr, \(dqr\(dq);
+    it.yycursor = it.token = it.yylimit = it.buffer.ptr + BufSize;
+    it.eof = 0;
+
+    // Run the lexer.
+    assert(lex(it) == count);
+
+    // Cleanup: remove input file.
+    fclose(it.file);
+    remove(fname.ptr);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-i
+module main;
+
+enum ERROR = ulong.max;
+
+private void add(ulong BASE)(ref ulong u, int d) {
+    u = u * BASE + d;
+    if (u > uint.max) { u = ERROR; }
+}
+
+private ulong parse_u32(const(char)* s) {
+    const(char)* yycursor = s, yymarker;
+    ulong u = 0;
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:define:YYCTYPE = char;
+
+        end = \(dq\ex00\(dq;
+
+        \(aq0b\(aq / [01]        { goto bin; }
+        \(dq0\(dq                { goto oct; }
+        \(dq\(dq / [1\-9]         { goto dec; }
+        \(aq0x\(aq / [0\-9a\-fA\-F] { goto hex; }
+        *                  { return ERROR; }
+    */
+bin:
+    /*!re2c
+        end   { return u; }
+        [01]  { add!(2)(u, yycursor[\-1] \- \(aq0\(aq); goto bin; }
+        *     { return ERROR; }
+    */
+oct:
+    /*!re2c
+        end   { return u; }
+        [0\-7] { add!(8)(u, yycursor[\-1] \- \(aq0\(aq); goto oct; }
+        *     { return ERROR; }
+    */
+dec:
+    /*!re2c
+        end   { return u; }
+        [0\-9] { add!(10)(u, yycursor[\-1] \- \(aq0\(aq); goto dec; }
+        *     { return ERROR; }
+    */
+hex:
+    /*!re2c
+        end   { return u; }
+        [0\-9] { add!(16)(u, yycursor[\-1] \- \(aq0\(aq);      goto hex; }
+        [a\-f] { add!(16)(u, yycursor[\-1] \- \(aqa\(aq + 10); goto hex; }
+        [A\-F] { add!(16)(u, yycursor[\-1] \- \(aqA\(aq + 10); goto hex; }
+        *     { return ERROR; }
+    */
+}
+
+void main() {
+    assert(parse_u32(\(dq\(dq) == ERROR);
+    assert(parse_u32(\(dq1234567890\(dq) == 1234567890);
+    assert(parse_u32(\(dq0b1101\(dq) == 13);
+    assert(parse_u32(\(dq0x7Fe\(dq) == 2046);
+    assert(parse_u32(\(dq0644\(dq) == 420);
+    assert(parse_u32(\(dq9999999999\(dq) == ERROR);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-ci
+module main;
+
+enum ERROR = ulong.max;
+/*!conditions:re2c*/
+
+private void add(ulong BASE)(ref ulong u, int d) {
+    u = u * BASE + d;
+    if (u > uint.max) { u = ERROR; }
+}
+
+private ulong parse_u32(const(char)* s) {
+    const(char)* yycursor = s, yymarker;
+    YYCond yycond = YYCond.yycinit;
+    ulong u = 0;
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:define:YYCTYPE = char;
+
+        <*> * { return ERROR; }
+        <init> \(aq0b\(aq / [01]        :=> bin
+        <init> \(dq0\(dq                :=> oct
+        <init> \(dq\(dq / [1\-9]         :=> dec
+        <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+        <bin, oct, dec, hex> \(dq\ex00\(dq { return u; }
+        <bin> [01]  { add!(2)(u,  yycursor[\-1] \- \(aq0\(aq);      goto yyc_bin; }
+        <oct> [0\-7] { add!(8)(u,  yycursor[\-1] \- \(aq0\(aq);      goto yyc_oct; }
+        <dec> [0\-9] { add!(10)(u, yycursor[\-1] \- \(aq0\(aq);      goto yyc_dec; }
+        <hex> [0\-9] { add!(16)(u, yycursor[\-1] \- \(aq0\(aq);      goto yyc_hex; }
+        <hex> [a\-f] { add!(16)(u, yycursor[\-1] \- \(aqa\(aq + 10); goto yyc_hex; }
+        <hex> [A\-F] { add!(16)(u, yycursor[\-1] \- \(aqA\(aq + 10); goto yyc_hex; }
+    */
+}
+
+
+
+void main() {
+    assert(parse_u32(\(dq\(dq) == ERROR);
+    assert(parse_u32(\(dq1234567890\(dq) == 1234567890);
+    assert(parse_u32(\(dq0b1101\(dq) == 13);
+    assert(parse_u32(\(dq0x7Fe\(dq) == 2046);
+    assert(parse_u32(\(dq0644\(dq) == 420);
+    assert(parse_u32(\(dq9999999999\(dq) == ERROR);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-f
+module main;
+
+import core.stdc.stdio;
+import core.stdc.string;
+
+// Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+// In real world use a larger buffer.
+enum BUFSIZE = 10;
+
+struct State {
+    FILE* file;
+    char[BUFSIZE + 1] buffer;
+    char* yylimit, yycursor, yymarker, token;
+    int yystate;
+};
+
+enum Status {END, READY, WAITING, BAD_PACKET, BIG_PACKET};
+
+private Status fill(ref State st) {
+    const size_t shift = st.token \- cast(char*)st.buffer;
+    const size_t used = st.yylimit \- st.token;
+    const size_t free = BUFSIZE \- used;
+
+    // Error: no space. In real life can reallocate a larger buffer.
+    if (free < 1) return Status.BIG_PACKET;
+
+    // Shift buffer contents (discard already processed data).
+    memmove(cast(void*)st.buffer, st.token, used);
+    st.yylimit \-= shift;
+    st.yycursor \-= shift;
+    st.yymarker \-= shift;
+    st.token \-= shift;
+
+    // Fill free space at the end of buffer with new data.
+    const size_t read = fread(st.yylimit, 1, free, st.file);
+    st.yylimit += read;
+    st.yylimit[0] = 0; // append sentinel symbol
+
+    return Status.READY;
+}
+
+private Status lex(ref State yyrecord, uint* recv) {
+    char yych;
+    /*!getstate:re2c*/
+
+    for (;;) {
+        yyrecord.token = yyrecord.yycursor;
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYCTYPE = char;
+        re2c:define:YYFILL = \(dqreturn Status.WAITING;\(dq;
+        re2c:eof = 0;
+
+        packet = [a\-z]+[;];
+
+        *      { return Status.BAD_PACKET; }
+        $      { return Status.END; }
+        packet { *recv = *recv + 1; continue; }
+    */
+    }
+    assert(0); // unreachable
+}
+
+private void test(string[] packets, Status expect) {
+    // Create a pipe (open the same file for reading and writing).
+    const(char*) fname = \(dqpipe\(dq;
+    FILE* fw = fopen(fname, \(dqw\(dq);
+    FILE* fr = fopen(fname, \(dqr\(dq);
+    setvbuf(fw, null, _IONBF, 0);
+    setvbuf(fr, null, _IONBF, 0);
+
+    // Initialize lexer state: \(gastate\(ga value is \-1, all pointers are at the end
+    // of buffer.
+    State st;
+    st.file = fr;
+    st.yycursor = st.yymarker = st.token = st.yylimit = cast(char*)st.buffer + BUFSIZE;
+    // Sentinel (at YYLIMIT pointer) is set to zero, which triggers YYFILL.
+    st.yylimit[0] = 0;
+    st.yystate = \-1;
+
+    // Main loop. The buffer contains incomplete data which appears packet by
+    // packet. When the lexer needs more input it saves its internal state and
+    // returns to the caller which should provide more input and resume lexing.
+    Status status;
+    uint send = 0, recv = 0;
+    for (;;) {
+        status = lex(st, &recv);
+        if (status == Status.END) {
+            debug{printf(\(dqdone: got %u packets\en\(dq, recv);}
+            break;
+        } else if (status == Status.WAITING) {
+            debug{printf(\(dqwaiting...\en\(dq);}
+            if (send < packets.length) {
+                debug{printf(\(dqsent packet %u\en\(dq, send);}
+                fprintf(fw, \(dq%s\(dq, cast(char*)packets[send]);
+                ++send;
+            }
+            status = fill(st);
+            debug{printf(\(dqqueue: \(aq%s\(aq\en\(dq, cast(char*)st.buffer);}
+            if (status == Status.BIG_PACKET) {
+                debug{printf(\(dqerror: packet too big\en\(dq);}
+                break;
+            }
+            assert(status == Status.READY);
+        } else {
+            assert(status == Status.BAD_PACKET);
+            debug{printf(\(dqerror: ill\-formed packet\en\(dq);}
+            break;
+        }
+    }
+
+    // Check results.
+    assert(status == expect);
+    if (status == Status.END) assert(recv == send);
+
+    // Cleanup: remove input file.
+    fclose(fw);
+    fclose(fr);
+    remove(fname);
+}
+
+void main() {
+    string[] packets1 = [];
+    string[] packets2 = [\(dqzero;\(dq, \(dqone;\(dq, \(dqtwo;\(dq, \(dqthree;\(dq, \(dqfour;\(dq];
+    string[] packets3 = [\(dqzer0;\(dq];
+    string[] packets4 = [\(dqlooooooooooong;\(dq];
+
+    test(packets1, Status.END);
+    test(packets2, Status.END);
+    test(packets3, Status.BAD_PACKET);
+    test(packets4, Status.BIG_PACKET);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+// This example shows how to combine reusable re2c blocks: two blocks
+// (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+// in both blocks; the \(aqfish\(aq block takes priority because it is used
+// earlier. Default rule * occurs in all three blocks; the local (not
+// inherited) definition takes priority.
+
+enum What { COLOR, FISH, DUNNO };
+
+/*!rules:re2c:colors
+    *                            { assert(false); }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { return What.COLOR; }
+*/
+
+/*!rules:re2c:fish
+    *                            { assert(false); }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { return What.FISH; }
+*/
+
+private What lex(const(char)* s) {
+    const(char)* yycursor = s, yymarker;
+    /*!re2c
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:yyfill:enable = 0;
+
+        !use:fish;
+        !use:colors;
+        * { return What.DUNNO; } // overrides inherited \(aq*\(aq rules
+    */
+}
+
+void main() {
+    assert(lex(\(dqsalmon\(dq) == What.FISH);
+    assert(lex(\(dqwhat?\(dq) == What.DUNNO);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+module main;
+
+import std.stdint;
+
+// This example supports multiple input encodings: UTF\-8 and UTF\-32.
+// Both lexers are generated from the same rules block, and the use
+// blocks add only encoding\-specific configurations.
+/*!rules:re2c
+    re2c:yyfill:enable = 0;
+
+    \(dq∀x ∃y\(dq { return 0; }
+    *       { return 1; }
+*/
+
+private int lex_utf8(const(uint8_t)* s) {
+    const(uint8_t)* yycursor = s, yymarker;
+    /*!use:re2c
+        re2c:define:YYCTYPE = uint8_t;
+        re2c:encoding:utf8 = 1;
+    */
+}
+
+private int lex_utf32(const(uint32_t)* s) {
+    const(uint32_t)* yycursor = s, yymarker;
+    /*!use:re2c
+        re2c:define:YYCTYPE = uint32_t;
+        re2c:encoding:utf32 = 1;
+    */
+}
+
+void main() {
+    immutable uint8_t[] s8 = // UTF\-8
+        [ 0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79 ];
+
+    immutable uint32_t[] s32 = // UTF32
+        [ 0x00002200, 0x00000078, 0x00000020, 0x00002203, 0x00000079 ];
+
+    assert(lex_utf8(cast(const(uint8_t)*)s8) == 0);
+    assert(lex_utf32(cast(const(uint32_t)*)s32) == 0);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+struct SemVer {
+    int major;
+    int minor;
+    int patch;
+};
+
+private int s2n(const(char)* s, const(char)* e) { // pre\-parsed string to number
+    int n = 0;
+    for (; s < e; ++s) n = n * 10 + (*s \- \(aq0\(aq);
+    return n;
+}
+
+private bool lex(const(char)* str, ref SemVer ver) {
+    const(char)* yycursor = str, yymarker;
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqconst(char)* @@;\en\(aq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(aqconst(char)* @@;\en\(aq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+            ver.major = s2n(t1, t2);
+            ver.minor = s2n(t3, t4);
+            ver.patch = t5 != null ? s2n(t5, yycursor \- 1) : 0;
+            return true;
+        }
+        * { return false; }
+    */
+}
+
+void main() {
+    SemVer v;
+    assert(lex(\(dq23.34\(dq, v) && v.major == 23 && v.minor == 34 && v.patch == 0);
+    assert(lex(\(dq1.2.999\(dq, v) && v.major == 1 && v.minor == 2 && v.patch == 999);
+    assert(!lex(\(dq1.a\(dq, v));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-\-tags
+module main;
+
+import core.stdc.string;
+import core.stdc.stdio;
+import std.stdio;
+
+enum BUFSIZE = 4095;
+
+struct Input {
+    FILE* file;
+    char[BUFSIZE + 1] buffer;// +1 for sentinel
+    char* yylimit, yycursor, yymarker, token;
+    // Intermediate tag variables must be part of the lexer state passed to YYFILL.
+    // They don\(aqt correspond to tags and should be autogenerated by re2c.
+    /*!stags:re2c format = \(aqchar* @@;\(aq; */
+    bool eof;
+};
+
+struct SemVer {
+    int major;
+    int minor;
+    int patch;
+};
+
+private int s2n(const(char)* s, const(char)* e) { // pre\-parsed string to number
+    int n = 0;
+    for (; s < e; ++s) n = n * 10 + (*s \- \(aq0\(aq);
+    return n;
+}
+
+private int fill(ref Input it) {
+    if (it.eof) return 1;
+
+    const size_t shift = it.token \- it.buffer.ptr;
+    const size_t used = it.yylimit \- it.token;
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (shift < 1) return 2;
+
+    // Shift buffer contents (discard everything up to the current token).
+    memmove(cast(void*)it.buffer.ptr, it.token, used);
+    it.yylimit \-= shift;
+    it.yycursor \-= shift;
+    it.yymarker \-= shift;
+    it.token \-= shift;
+    // Tag variables need to be shifted like other input positions. The check
+    // for non\-null is only needed if some tags are nested inside of alternative
+    // or repetition, so that they can have null value.
+    /*!stags:re2c format = \(dqif (it.@@) it.@@ \-= shift;\en\(dq; */
+
+    // Fill free space at the end of buffer with new data from file.
+    it.yylimit += fread(it.yylimit, 1, BUFSIZE \- used, it.file);
+    it.yylimit[0] = 0;
+    it.eof = it.yylimit < (it.buffer.ptr + BUFSIZE);
+    return 0;
+}
+
+private bool lex(ref Input yyrecord, ref SemVer[] vers) {
+    // Final variables available in semantic actions.
+    /*!svars:re2c format = \(aqchar* @@;\(aq; */
+    for (;;) {
+        yyrecord.token = yyrecord.yycursor;
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:define:YYFILL = \(dqfill(yyrecord) == 0\(dq;
+        re2c:eof = 0;
+
+        num = [0\-9]+;
+
+        num @t1 \(dq.\(dq @t2 num @t3 (\(dq.\(dq @t4 num)? [\en] {
+            int major = s2n(yyrecord.token, t1);
+            int minor = s2n(t2, t3);
+            int patch = t4 != null ? s2n(t4, yyrecord.yycursor \- 1) : 0;
+            SemVer ver = SemVer(major, minor, patch);
+            vers ~= ver;
+            continue;
+        }
+        $ { return true; }
+        * { return false; }
+    */
+    }
+    assert(0);
+}
+
+void main() {
+    const char[] fname = \(dqinput\(dq;
+    const char[] content = \(dq1.22.333\en\(aq \(dq;
+
+    SemVer[BUFSIZE] expect = SemVer(1, 22, 333);
+    SemVer[] actual;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    FILE* f = fopen(fname.ptr, \(dqw\(dq);
+    for (int i = 0; i < BUFSIZE; ++i) {
+        fwrite(cast(const(void*)) content.ptr, 1, content.length \- 2, f); // skip null\-terminator
+    }
+    fclose(f);
+
+    // Initialize lexer state: all pointers are at the end of buffer.
+    Input it;
+    it.file = fopen(fname.ptr, \(dqr\(dq);
+    it.yycursor = it.yymarker = it.token = it.yylimit = it.buffer.ptr + BUFSIZE;
+    it.eof = 0;
+    // Sentinel (at YYLIMIT pointer) is set to zero, which triggers YYFILL.
+    it.yylimit[0] = 0;
+
+    // Run the lexer.
+    assert(lex(it, actual) && actual == expect);
+
+    // Cleanup: remove input file.
+    fclose(it.file);
+    remove(fname.ptr);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+struct SemVer { int major, minor, patch; };
+
+private int s2n(const(char)* s, const(char)* e) { // pre\-parsed string to number
+    int n = 0;
+    for (; s < e; ++s) n = n * 10 + (*s \- \(aq0\(aq);
+    return n;
+}
+
+private bool lex(const(char)* str, ref SemVer ver) {
+    const(char)* yycursor = str, yymarker;
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqconst(char)* @@;\en\(aq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(aqconst(char)* @@;\en\(aq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:captvars = 1;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+
+        num = [0\-9]+;
+
+        (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+            ver.major = s2n(yytl1, yytr1);
+            ver.minor = s2n(yytl2, yytr2);
+            ver.patch = yytl3 ? s2n(yytl3 + 1, yytr3) : 0;
+            return true;
+        }
+        * { return false; }
+    */
+}
+
+void main() {
+    SemVer v;
+    assert(lex(\(dq23.34\(dq, v) && v.major == 23 && v.minor == 34 && v.patch == 0);
+    assert(lex(\(dq1.2.999\(dq, v) && v.major == 1 && v.minor == 2 && v.patch == 999);
+    assert(!lex(\(dq1.a\(dq, v));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT
+module main;
+
+enum MtagRoot = \-1;
+
+// An m\-tag tree is a way to store histories with an O(1) copy operation.
+// Histories naturally form a tree, as they have common start and fork at some
+// point. The tree is stored as an array of pairs (tag value, link to parent).
+// An m\-tag is represented with a single link in the tree (array index).
+struct Mtag {
+    const(char)* elem; // tag value
+    int pred; // index of the predecessor node or root
+};
+
+alias MtagTrie = Mtag[];
+alias Ver = int[];
+
+private int s2n(const(char)* s, const(char)* e) { // pre\-parsed string to number
+    int n = 0;
+    for (; s < e; ++s) n = n * 10 + (*s \- \(aq0\(aq);
+    return n;
+}
+
+// Append a single value to an m\-tag history.
+private void add_mtag(ref MtagTrie trie, ref int mtag, const(char)* value) {
+    Mtag m = {value, mtag};
+    mtag = cast(int)trie.length;
+    trie ~= [m];
+}
+
+// Recursively unwind tag histories and collect version components.
+private void unfold(const ref MtagTrie trie, int x, int y, ref Ver ver) {
+    // Reached the root of the m\-tag tree, stop recursion.
+    if (x == MtagRoot && y == MtagRoot) return;
+
+    // Unwind history further.
+    unfold(trie, trie[x].pred, trie[y].pred, ver);
+
+    // Get tag values. Tag histories must have equal length.
+    assert(x != MtagRoot && y != MtagRoot);
+    const(char)* ex = trie[x].elem, ey = trie[y].elem;
+
+    if (ex != null && ey != null) {
+        // Both tags are valid pointers, extract component.
+        ver ~= [s2n(ex, ey)];
+    } else {
+        // Both tags are null (this corresponds to zero repetitions).
+        assert(ex == null && ey == null);
+    }
+}
+
+private bool parse(const(char)* str, ref Ver ver) {
+    const(char)* yycursor = str, yymarker;
+    MtagTrie mt;
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(dqconst(char)* @@;\(dq; */
+    /*!mvars:re2c format = \(dqint @@;\(dq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(dqconst(char)* @@ = null;\(dq; */
+    /*!mtags:re2c format = \(dqint @@ = MtagRoot;\(dq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:define:YYMTAGP = \(dqadd_mtag(mt, @@, yycursor);\(dq;
+        re2c:define:YYMTAGN = \(dqadd_mtag(mt, @@, null);\(dq;
+
+        num = [0\-9]+;
+        @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+            ver = [];
+            ver ~= [s2n(t1, t2)];
+            unfold(mt, t3, t4, ver);
+            return true;
+        }
+        * { return false; }
+    */
+}
+
+void main() {
+    Ver v;
+    assert(parse(\(dq1\(dq, v) && v == [1]);
+    assert(parse(\(dq1.2.3.4.5.6.7\(dq, v) && v == [1, 2, 3, 4, 5, 6, 7]);
+    assert(!parse(\(dq1.2.\(dq, v));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-8 \-i
+module main;
+
+/*!include:re2c \(dqunicode_categories.re\(dq */
+
+private int lex(const(char)* s) {
+    const(char)* yycursor = s, yymarker;
+    /*!re2c
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:yyfill:enable = 0;
+
+        // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+        // (see https://unicode.org/reports/tr31)
+        id_start    = L | Nl | [$_];
+        id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+        identifier  = id_start id_continue*;
+        identifier { return 0; }
+        *          { return 1; }
+    */
+}
+
+void main() {
+    assert(lex(\(dq_Ыдентификатор\(dq) == 0);
+    assert(lex(\(dq!!!\(dq)==1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.d)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+enum Result{ OK, FAIL };
+
+/*!re2c
+    number = [1\-9][0\-9]*;
+*/
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { return Result.OK; }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-i
+
+/*!include:re2c \(dqdefinitions.d\(dq */
+
+private Result lex(const(char)* s) {
+    const(char)* yycursor = s, yymarker;
+    /*!re2c
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:yyfill:enable = 0;
+
+        *      { return Result.FAIL; }
+        number { return Result.OK; }
+        !include \(dqextra_rules.re.inc\(dq;
+    */
+}
+
+void main() {
+    assert(lex(\(dq123\(dq) == Result.OK);
+    assert(lex(\(dq123.4567\(dq) == Result.OK);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2d $INPUT \-o $OUTPUT \-i \-\-header lexer/state.d
+module main;
+
+import core.stdc.stddef;
+import lexer.state; // the module is generated by re2c
+
+/*!header:re2c:on*/
+module lexer.state;
+
+struct LexerState {
+    const(char)* str, yycursor;
+    /*!stags:re2c format = \(dqconst(char)* @@;\(dq; */
+};
+/*!header:re2c:off*/
+
+private long lex(ref LexerState yyrecord) {
+    const(char)* t;
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYCTYPE = \(dqchar\(dq;
+        re2c:tags = 1;
+        re2c:yyfill:enable = 0;
+        re2c:header = \(dqlexer/state.d\(dq;
+
+        [a]* @t [b]* { return t \- yyrecord.str; }
+    */
+}
+
+void main() {
+    const(char)* s = \(dqab\(dq;
+    LexerState st = {s, s /*!stags:re2c format = \(dq, null\(dq; */};
+    assert(lex(st) == 1);
+
+    const(char)* s2 = \(dqaaabbbbbbbb\(dq;
+    LexerState st2 = {s2, s2 /*!stags:re2c format = \(dq, null\(dq; */};
+    assert(lex(st2) == 3);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+/* Generated by re2c */
+
+module lexer.state;
+
+struct LexerState {
+    const(char)* str, yycursor;
+    const(char)* yyt1;
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2go.1 b/bootstrap/doc/re2go.1
index 0f8e280c3..2fc1d9a65 100644
--- a/bootstrap/doc/re2go.1
+++ b/bootstrap/doc/re2go.1
@@ -236,8 +236,8 @@ program:
 .TP
 .B \fBSimple API\fP
 (\fIadded in version 4.0\fP)
-This is a basic API that can be enabled with option \fB\-\-api simple\fP or
-configuration \fBre2c:api = simple\fP\&. It consists of the following
+This is a basic API that can be enabled with \fB\-\-api simple\fP option or
+\fBre2c:api = simple\fP configuration. It consists of the following
 primitives: \fBYYINPUT\fP (which should be defined as a sequence of code
 units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
 \fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
@@ -249,8 +249,8 @@ units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
 .B \fBRecord API\fP
 (\fIadded in version 4.0\fP)
 Record API is useful in cases when lexer state must be stored in a struct.
-It is enabled with option \fB\-\-api record\fP or configuration
-\fBre2c:api = record\fP\&. This API consists of a variable \fByyrecord\fP (the
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
 name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
 defined as a struct with fields \fByyinput\fP, \fByycursor\fP, \fByymarker\fP,
 \fByyctxmarker\fP, \fByylimit\fP (only the fields used by the generated code
@@ -261,10 +261,8 @@ need to be defined, and their names can be configured).
 .sp
 .TP
 .B \fBGeneric API\fP
-(\fIadded in version 0.14\fP)
-This is the default API for the Go backend. It is enabled with
-\fB\-\-api generic\fP option or \fBre2c:api = generic\fP configuration.
-This API contains primitives for generic operations:
+This is the most flexible API and the default API for the Go backend.
+It contains primitives for generic operations:
 \fBYYPEEK\fP,
 \fBYYSKIP\fP,
 \fBYYBACKUP\fP,
@@ -2823,53 +2821,64 @@ func main() {
 .SH SUBMATCH EXTRACTION
 .sp
 re2c has two options for submatch extraction.
-.sp
-The first option is \fB\-T \-\-tags\fP\&. With this option one can use standalone tags
-of the form \fB@stag\fP and \fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary
-used\-defined names. Tags can be used anywhere inside of a regular expression;
-semantically they are just position markers. Tags of the form \fB@stag\fP are
-called s\-tags: they denote a single submatch value (the last input position
-where this tag matched). Tags of the form \fB#mtag\fP are called m\-tags: they
-denote multiple submatch values (the whole history of repetitions of this tag).
-All tags should be defined by the user as variables with the corresponding
-names. With standalone tags re2c uses leftmost greedy disambiguation: submatch
-positions correspond to the leftmost matching path through the regular
-expression.
-.sp
-The second option is \fB\-P \-\-posix\-captures\fP: it enables POSIX\-compliant
-capturing groups. In this mode parentheses in regular expressions denote the
-beginning and the end of capturing groups; the whole regular expression is group
-number zero. The number of groups for the matching rule is stored in a variable
-\fByynmatch\fP, and submatch results are stored in \fByypmatch\fP array. Both
-\fByynmatch\fP and \fByypmatch\fP should be defined by the user, and \fByypmatch\fP
-size must be at least \fB[yynmatch * 2]\fP\&. re2c provides a directive
-\fB/*!maxnmatch:re2c*/\fP that defines \fBYYMAXNMATCH\fP: a constant  equal to the
-maximal value of \fByynmatch\fP among all rules. Note that re2c implements
-POSIX\-compliant disambiguation: each subexpression matches as long as possible,
-and subexpressions that start earlier in regular expression have priority over
-those starting later. Capturing groups are translated into s\-tags under the
-hood, therefore we use the word \(dqtag\(dq to describe them as well.
-.sp
-With both \fB\-P \-\-posix\-captures\fP and \fBT \-\-tags\fP options re2c uses efficient
-submatch extraction algorithm described in the
-\fI\%Tagged Deterministic Finite Automata with Lookahead\fP
-paper. The overhead on submatch extraction in the generated lexer grows with the
-number of tags \-\-\- if this number is moderate, the overhead is barely
-noticeable. In the lexer tags are implemented using a number of tag variables
-generated by re2c. There is no one\-to\-one correspondence between tag variables
-and tags: a single variable may be reused for different tags, and one tag may
-require multiple variables to hold all its ambiguous values. Eventually
-ambiguity is resolved, and only one final variable per tag survives. When a rule
-matches, all its tags are set to the values of the corresponding tag variables.
-The exact number of tag variables is unknown to the user; this number is
-determined by re2c. However, tag variables should be defined by the user as a
-part of the lexer state and updated by \fBYYFILL\fP, therefore re2c provides
-directives \fB/*!stags:re2c*/\fP and \fB/*!mtags:re2c*/\fP that can be used to
-declare, initialize and manipulate tag variables. These directives have two
-optional configurations: \fBformat = \(dq@@\(dq;\fP (specifies the template where \fB@@\fP
-is substituted with the name of each tag variable), and \fBseparator = \(dq\(dq;\fP
-(specifies the piece of code used to join the generated pieces for different
-tag variables).
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
 .sp
 S\-tags support the following operations:
 .INDENT 0.0
@@ -3106,7 +3115,7 @@ func main() () {
 .UNINDENT
 .UNINDENT
 .sp
-Here is an example of using POSIX capturing groups to parse semantic versions.
+Here is an example of using capturing groups to parse semantic versions.
 .INDENT 0.0
 .INDENT 3.5
 .sp
@@ -3117,9 +3126,6 @@ package main
 
 import \(dqreflect\(dq
 
-// Maximum number of capturing groups among all rules.
-/*!maxnmatch:re2c*/
-
 type SemVer struct { major, minor, patch int }
 
 func s2n(s string) int { // convert pre\-parsed string to a number
@@ -3131,9 +3137,8 @@ func s2n(s string) int { // convert pre\-parsed string to a number
 func parse(yyinput string) *SemVer {
     var yycursor, yymarker int
 
-    // Allocate memory for capturing parentheses (twice the number of groups).
-    yypmatch := make([]int, YYMAXNMATCH*2)
-    var yynmatch int
+    // Final tag variables used in semantic action.
+    /*!svars:re2c format = \(aqvar @@ int;\(aq; */
 
     // Intermediate tag variables used by the lexer (must be autogenerated).
     /*!stags:re2c format = \(aqvar @@ int;\(aq; */
@@ -3142,20 +3147,16 @@ func parse(yyinput string) *SemVer {
         re2c:yyfill:enable = 0;
         re2c:api = default;
         re2c:define:YYCTYPE = byte;
-        re2c:posix\-captures = 1;
+        re2c:captvars = 1;
 
         num = [0\-9]+;
 
         (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
-            // \(gayynmatch\(ga is the number of capturing groups
-            if yynmatch != 4 { panic(\(dqexpected 4 submatch groups\(dq) }
-
-            // Even \(gayypmatch\(ga values are for opening parentheses, odd values
-            // are for closing parentheses, the first group is the whole match.
-            major := s2n(yyinput[yypmatch[2]:yypmatch[3]])
-            minor := s2n(yyinput[yypmatch[4]:yypmatch[5]])
+            _ = yytl0; _ = yytr0; // some variables are unused
+            major := s2n(yyinput[yytl1:yytr1])
+            minor := s2n(yyinput[yytl2:yytr2])
             patch := 0
-            if yypmatch[6] != \-1 { patch = s2n(yyinput[yypmatch[6]+1:yypmatch[7]]) }
+            if yytl3 != \-1 { patch = s2n(yyinput[yytl3+1:yytr3]) }
 
             return &SemVer{major, minor, patch}
         }
diff --git a/bootstrap/doc/re2hs.1 b/bootstrap/doc/re2hs.1
new file mode 100644
index 000000000..9c7ae4aaa
--- /dev/null
+++ b/bootstrap/doc/re2hs.1
@@ -0,0 +1,3544 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for Haskell, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-i
+{\-# LANGUAGE OverloadedStrings #\-}
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int
+}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;Bool\(dq, \(dqState{..};State\(dq];
+    re2c:yyfill:enable = 0;
+
+    number = [1\-9][0\-9]*;
+
+    number { True }
+    *      { False }
+%}
+
+main :: IO ()
+main = case lexer State{_yyinput = \(dq1234\e0\(dq, _yycursor = 0} of
+    True \-> return ()
+    False \-> error \(dqlexer failed!\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- Generated by re2hs
+{\-# LANGUAGE RecordWildCards #\-}
+\-\- re2hs $INPUT \-o $OUTPUT \-i
+{\-# LANGUAGE OverloadedStrings #\-}
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int
+}
+
+
+yy0 :: State \-> Bool
+yy0 State{..} =
+    let yych = index _yyinput _yycursor in
+    let __ = _yycursor + 1 in let _yycursor = __ in
+    case yych of
+        _c | 0x31 <= _c && _c <= 0x39 \->
+            yy2 State{..}
+        _c | True \->
+            yy1 State{..}
+
+yy1 :: State \-> Bool
+yy1 State{..} =
+    False
+
+yy2 :: State \-> Bool
+yy2 State{..} =
+    let yych = index _yyinput _yycursor in
+    case yych of
+        _c | 0x30 <= _c && _c <= 0x39 \->
+            let __ = _yycursor + 1 in let _yycursor = __ in
+            yy2 State{..}
+        _c | True \->
+            yy3 State{..}
+
+yy3 :: State \-> Bool
+yy3 State{..} =
+    True
+
+lexer :: State \-> Bool
+lexer State{..} =
+    yy0 State{..}
+
+
+
+main :: IO ()
+main = case lexer State{_yyinput = \(dq1234\e0\(dq, _yycursor = 0} of
+    True \-> return ()
+    False \-> error \(dqlexer failed!\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2hs has two API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBRecord API\fP
+Record API is the default API for the Haskell backend.
+This API consists of a binding \fByyrecord\fP (the name can be overridden with
+\fBre2c:variable:yyrecord\fP) that should be defined as a record with fields
+\fB_yyinput\fP, \fB_yycursor\fP, \fB_yymarker\fP, \fB_yyctxmarker\fP, \fB_yylimit\fP\&.
+Only the fields used by the generated code need to be defined, and their
+names can be configured.
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYCOPYSTAG\fP,
+\fBYYCOPYMTAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int,
+    _count :: Int
+}
+
+\-\- expect a null\-terminated string
+%{
+    re2c:define:YYFN = [\(dqlexer;Int\(dq, \(dqState{..};State\(dq];
+    re2c:yyfill:enable = 0;
+
+    *      { (\-1) }
+    [\ex00] { _count }
+    [a\-z]+ { lexer State{_count = _count + 1, ..} }
+    [ ]+   { lexer State{..} }
+%}
+
+main :: IO ()
+main = do
+    let test s n = when (lexer st  /= n) $ error \(dqfailed\(dq
+                   where st = State{_yyinput = s, _yycursor = 0, _count = 0}
+    test \(dq\e0\(dq 0
+    test \(dqone two three\e0\(dq 3
+    test \(dqf0ur\e0\(dq (\-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import qualified Data.ByteString as BS
+import Data.Word
+
+data State = State {
+    _yyinput :: BS.ByteString,
+    _yycursor :: Int,
+    _yymarker :: Int,
+    _yylimit :: Int,
+    _count :: Int
+}
+
+\-\- expect a null\-terminated string
+%{
+    re2c:define:YYFN = [\(dqlexer;Int\(dq, \(dqState{..};State\(dq];
+    re2c:define:YYCTYPE = \(dqWord8\(dq;
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+    re2c:eof = 0;
+    re2c:yyfill:enable = 0;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *    { (\-1) }
+    $    { _count }
+    str  { lexer State{_count = _count + 1, ..} }
+    [ ]+ { lexer State{..} }
+%}
+
+main :: IO ()
+main = do
+    let test s n = do
+            let st = State {
+                    _yyinput = s,
+                    _yycursor = 0, 
+                    _yymarker = 0,
+                    _yylimit = BS.length s \- 1, \-\- terminating null not included
+                    _count = 0}
+
+            when (lexer st /= n) $ error \(dqfailed\(dq
+
+    test \(dq\e0\(dq 0
+    test \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \e0\(dq 3
+    test \(dq\(aqunterminated\e\e\(aq\e0\(dq (\-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Exception
+import Control.Monad (when)
+import qualified Data.ByteString as BS
+
+data State = State {
+    _yyinput :: BS.ByteString,
+    _yycursor :: Int,
+    _yylimit :: Int,
+    _count :: Int
+}
+
+data FillException = UnexpectedFill deriving (Show)
+instance Exception FillException
+
+yymaxfill :: Int
+%{max %}
+
+%{
+    re2c:define:YYFN  = [\(dqlexer;IO Int\(dq, \(dqState{..};State\(dq];
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+    re2c:define:YYFILL = \(dqthrow UnexpectedFill\(dq;
+    re2c:monadic = 1; // YYFILL requires monadic do\-notation for \(gawhen\(ga conditions
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    [\ex00] {
+        \-\- check that it is the sentinel, not some unexpected null
+        return $ if _yycursor == BS.length _yyinput \- yymaxfill + 1 then _count else (\-1)
+    }
+    str  { lexer State{_count = _count + 1, ..} }
+    [ ]+ { lexer State{..} }
+    *    { return (\-1) }
+%}
+
+main :: IO ()
+main = do
+    let test s n = do
+            let buf = BS.concat [s, BS.replicate yymaxfill 0]
+            let st = State {
+                    _yyinput = buf,
+                    _yycursor = 0,
+                    _yylimit = BS.length buf,
+                    _count = 0}
+            m <\- catch (lexer st) (\e(_ :: FillException) \-> return (\-2))
+            when (m /= n) $ error \(dqfailed\(dq
+
+    test \(dq\(dq 0
+    test \(dq\(aqunterminated\e\e\(aq\(dq (\-2)
+    test \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq 3
+    test \(dq\(aqunexpected\(aq \e0 \(aqnull\(aq\(dq (\-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import qualified Data.ByteString as BS
+
+data State = State {
+    _str :: BS.ByteString,
+    _cur :: Int,
+    _lim :: Int,
+    _cnt :: Int
+}
+
+\-\- Expect a string without terminating null.
+%{
+    re2c:api = generic;
+    re2c:define:YYFN = [\(dqlexer;Int\(dq, \(dqState{..};State\(dq];
+    re2c:define:YYPEEK = \(dqif _cur < _lim then BS.index _str _cur else 0\(dq;
+    re2c:define:YYSKIP = \(dqlet cur = _cur + 1 in let _cur = cur in\(dq;
+    re2c:yyfill:enable = 0;
+
+    *      { (\-1) }
+    [\ex00] { _cnt }
+    [a\-z]+ { lexer State{_cnt = _cnt + 1, ..} }
+    [ ]+   { lexer State{..} }
+%}
+
+main :: IO ()
+main = do
+    let test s n =
+            let st = State {_str = s, _cur = 0, _lim = BS.length s, _cnt = 0}
+            in when (lexer st /= n) $ error \(dqfailed\(dq
+
+    test \(dq\(dq 0
+    test \(dqone two three \(dq 3
+    test \(dqf0ur\(dq (\-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad
+import Data.ByteString as BS
+import Data.Word
+import GHC.IO.Handle
+import System.Directory
+import System.IO
+
+chunk_size :: Int
+chunk_size = 4096
+
+data State = State {
+    _file :: !Handle,
+    _yyinput :: !BS.ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int,
+    _yylimit :: !Int,
+    _token :: !Int,
+    _eof :: !Bool,
+    _count :: !Int
+}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;IO Int\(dq, \(dqState{..};State;!State{..}\(dq];
+    re2c:define:YYCTYPE = \(dqWord8\(dq;
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+    re2c:define:YYFILL = \(dq(State{..}, yyfill) <\- fill State{..}\(dq;
+    re2c:eof = 0;
+    re2c:monadic = 1;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *    { return (\-1) }
+    $    { return _count }
+    str  { lexer State{_token = _yycursor, _count = _count + 1, ..} }
+    [ ]+ { lexer State{_token = _yycursor, ..} }
+%}
+
+fill :: State \-> IO (State, Bool)
+fill State{..} = do
+    case _eof of
+        True \-> return (State{..}, False)
+        False \-> do
+            \-\- Discard everything up to the current token, cut off terminating null,
+            \-\- read new chunk from file and reappend terminating null at the end.
+            chunk <\- BS.hGet _file chunk_size
+            return (State {
+                _yyinput = BS.concat [(BS.init . BS.drop _token) _yyinput, chunk, \(dq\e0\(dq],
+                _yycursor = _yycursor \- _token,
+                _yymarker = _yymarker \- _token,
+                _yylimit = _yylimit \- _token + BS.length chunk, \-\- exclude terminating null
+                _token = 0,
+                _eof = BS.null chunk, \-\- end of file?
+                ..}, True)
+
+main :: IO ()
+main = do
+    let fname = \(dqinput\(dq
+
+    \-\- Prepare input file.
+    BS.writeFile fname $ BS.concat [\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq | _ <\- [1..chunk_size]]
+    let expect = 3 * chunk_size \-\- the total number of strings in file
+
+    \-\- Run lexer on the prepared file.
+    fh <\- openFile fname ReadMode
+    let st = State {
+        _file = fh,
+        _yyinput = BS.singleton 0,
+        _yycursor = 0,
+        _yymarker = 0,
+        _token = 0,
+        _yylimit = 0,
+        _eof = False,
+        _count = 0
+    }
+    result <\- lexer st
+    hClose fh
+
+    \-\- Cleanup.
+    removeFile fname
+
+    \-\- Check result.
+    when (result /= expect) $ error $ \(dqexpected \(dq ++ show expect ++ \(dq, got \(dq ++ show result
+    return ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad
+import qualified Data.ByteString as BS
+import GHC.IO.Handle
+import System.Directory
+import System.IO
+
+chunk_size :: Int
+chunk_size = 4096
+
+data State = State {
+    _file :: !Handle,
+    _yyinput :: !BS.ByteString,
+    _yycursor :: !Int,
+    _yylimit :: !Int,
+    _token :: !Int,
+    _eof :: !Bool,
+    _count :: !Int
+}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;IO Int\(dq, \(dqState{..};State;!State{..}\(dq];
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+
+    // We have to turn off autogenerated YFILL check and write it manually as part of YYFILL
+    // implementation, so that we can propagate the updated state out of it.
+    re2c:yyfill:check = 0;
+    re2c:define:YYFILL = \(dqState{..} <\- fill State{..} @@\(dq;
+    re2c:monadic = 1;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *      { return (\-1) }
+    [\ex00] { return $ if _yycursor == _yylimit \- yymaxfill + 1 then _count else (\-1) }
+    str    { lexer State{_token = _yycursor, _count = _count + 1, ..} }
+    [ ]+   { lexer State{_token = _yycursor, ..} }
+%}
+
+yymaxfill :: Int
+%{max %}
+
+fill :: State \-> Int \-> IO State
+fill !st@State{..} !need =
+    if _yylimit \- _yycursor >= need then
+        return st
+    else case _eof of
+        True \-> error \(dqfill failed\(dq
+        False \-> do
+            \-\- Discard everything up to the current token, cut off terminating null,
+            \-\- read new chunk from file and reappend terminating null at the end.
+            chunk <\- BS.hGet _file chunk_size
+            let !eof = BS.length chunk < need \-\- end of file ?
+            let !buf = BS.concat [
+                    BS.drop _token _yyinput,
+                    chunk,
+                    if eof then (BS.replicate yymaxfill 0) else BS.empty]
+            return State {
+                _yyinput = buf,
+                _yycursor = _yycursor \- _token,
+                _yylimit = BS.length buf,
+                _token = 0,
+                _eof = eof,
+                ..}
+
+main :: IO ()
+main = do
+    let fname = \(dqinput\(dq
+
+    \-\- Prepare input file.
+    BS.writeFile fname $ BS.concat [\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq | _ <\- [1..chunk_size]]
+    let expect = 3 * chunk_size \-\- the total number of strings in file
+
+    \-\- Run lexer on the prepared file.
+    fh <\- openFile fname ReadMode
+    let st = State {
+        _file = fh,
+        _yyinput = BS.empty,
+        _yycursor = 0,
+        _token = 0,
+        _yylimit = 0,
+        _eof = False,
+        _count = 0
+    }
+    result <\- lexer st
+    hClose fh
+
+    \-\- Cleanup.
+    removeFile fname
+
+    \-\- Check result.
+    when (result /= expect) $ error $ \(dqexpected \(dq ++ show expect ++ \(dq, got \(dq ++ show result
+    return ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-i
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int
+}
+
+peek_digit :: ByteString \-> Int \-> Int \-> Int
+peek_digit str idx offs = fromIntegral (index str (idx \- 1)) \- offs
+
+%{
+    re2c:yyfill:enable = 0;
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_bin;Int\(dq, \(dqState{..};State\(dq, \(dqnum;Int\(dq];
+    [01] { parse_bin State{..} $ num * 2 + (peek_digit _yyinput _yycursor 48) }
+    *    { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_oct;Int\(dq, \(dqState{..};State\(dq, \(dqnum;Int\(dq];
+    [0\-7] { parse_oct State{..} $ num * 8 + (peek_digit _yyinput _yycursor 48) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_dec;Int\(dq, \(dqState{..};State\(dq, \(dqnum;Int\(dq];
+    [0\-9] { parse_dec State{..} $ num * 10 + (peek_digit _yyinput _yycursor 48) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_hex;Int\(dq, \(dqState{..};State\(dq, \(dqnum;Int\(dq];
+    [0\-9] { parse_hex State{..} $ num * 16 + (peek_digit _yyinput _yycursor 48) }
+    [a\-f] { parse_hex State{..} $ num * 16 + (peek_digit _yyinput _yycursor 87) }
+    [A\-F] { parse_hex State{..} $ num * 16 + (peek_digit _yyinput _yycursor 55) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse;Maybe Int\(dq, \(dqState{..};State\(dq];
+    \(aq0b\(aq / [01]        { Just $ parse_bin State{..} 0 }
+    \(dq0\(dq                { Just $ parse_oct State{..} 0 }
+    \(dq\(dq / [1\-9]         { Just $ parse_dec State{..} 0 }
+    \(aq0x\(aq / [0\-9a\-fA\-F] { Just $ parse_hex State{..} 0 }
+    *                  { Nothing }
+%}
+
+test :: ByteString \-> Maybe Int \-> IO ()
+test str expect = do
+    let s = State {_yyinput = str, _yycursor = 0, _yymarker = 0}
+    when (parse s /= expect) $ error \(dqfailed!\(dq
+
+main :: IO ()
+main = do
+    test \(dq\e0\(dq Nothing
+    test \(dq1234567890\e0\(dq (Just 1234567890)
+    test \(dq0b1101\e0\(dq (Just 13)
+    test \(dq0x7Fe\e0\(dq (Just 2046)
+    test \(dq0644\e0\(dq (Just 420)
+    test \(dq9223372036854775807\e0\(dq (Just 9223372036854775807)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-ci
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+%{conditions %}
+
+data State = State {
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int,
+    _yycond :: !YYCONDTYPE
+}
+
+peek_digit :: ByteString \-> Int \-> Int \-> Int
+peek_digit str idx offs = fromIntegral (index str (idx \- 1)) \- offs
+
+%{
+    re2c:define:YYFN = [\(dqparse;Maybe Int\(dq, \(dqState{..};State\(dq, \(dq_num;Int\(dq];
+    re2c:yyfill:enable = 0;
+
+    <init> \(aq0b\(aq / [01]        :=> bin
+    <init> \(dq0\(dq                :=> oct
+    <init> \(dq\(dq / [1\-9]         :=> dec
+    <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+    <init> * { Nothing }
+
+    <bin> [01]  { yyfnbin State{..} $ _num * 2 + (peek_digit _yyinput _yycursor 48) }
+    <oct> [0\-7] { yyfnoct State{..} $ _num * 8 + (peek_digit _yyinput _yycursor 48) }
+    <dec> [0\-9] { yyfndec State{..} $ _num * 10 + (peek_digit _yyinput _yycursor 48) }
+    <hex> [0\-9] { yyfnhex State{..} $ _num * 16 + (peek_digit _yyinput _yycursor 48) }
+    <hex> [a\-f] { yyfnhex State{..} $ _num * 16 + (peek_digit _yyinput _yycursor 87) }
+    <hex> [A\-F] { yyfnhex State{..} $ _num * 16 + (peek_digit _yyinput _yycursor 55) }
+
+    <bin, oct, dec, hex> * { Just _num }
+%}
+
+test :: ByteString \-> Maybe Int \-> IO ()
+test str expect = do
+    let s = State {
+            _yyinput = str,
+            _yycursor = 0,
+            _yymarker = 0,
+            _yycond = YYC_init}
+    when (parse s 0 /= expect) $ error \(dqfailed!\(dq
+
+main :: IO ()
+main = do
+    test \(dq\e0\(dq Nothing
+    test \(dq1234567890\e0\(dq (Just 1234567890)
+    test \(dq0b1101\e0\(dq (Just 13)
+    test \(dq0x7Fe\e0\(dq (Just 2046)
+    test \(dq0644\e0\(dq (Just 420)
+    test \(dq9223372036854775807\e0\(dq (Just 9223372036854775807)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-fi
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Concurrent.Chan
+import Control.Monad
+import Data.ByteString as BS
+import Text.Printf
+
+debug :: IO () \-> IO ()
+debug = when False
+
+data State = State {
+    _pipe :: !(Chan BS.ByteString),
+    _yyinput :: !BS.ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int,
+    _yylimit :: !Int,
+    _token :: !Int,
+    _eof :: !Bool,
+    _yystate :: !Int,
+    _recv :: !Int
+}
+
+data Status = End | Ready | Waiting | BadPacket deriving (Eq)
+
+%{
+    re2c:define:YYFN = [\(dqlexer;IO (State, Status)\(dq, \(dqState{..};State;!State{..}\(dq];
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+    re2c:define:YYFILL = \(dqreturn (State{..}, Waiting)\(dq;
+    re2c:eof = 0;
+    re2c:monadic = 1;
+
+    packet = [a\-z]+[;];
+
+    *      { return (State{..}, BadPacket) }
+    $      { return (State{..}, End) }
+    packet { lexer State{_token = _yycursor, _recv = _recv + 1, ..} }
+%}
+
+fill :: State \-> IO (State, Status)
+fill st@State{..} = do
+    case _eof of
+        True \-> return (st, End)
+        False \-> do
+            \-\- Discard everything up to the current token, cut off terminating null,
+            \-\- read new chunk from file and reappend terminating null at the end.
+            chunk <\- readChan _pipe
+            return (State {
+                _yyinput = BS.concat [(BS.init . BS.drop _token) _yyinput, chunk, \(dq\e0\(dq],
+                _yycursor = _yycursor \- _token,
+                _yymarker = _yymarker \- _token,
+                _yylimit = _yylimit \- _token + BS.length chunk, \-\- exclude terminating null
+                _token = 0,
+                _eof = BS.null chunk, \-\- end of file?
+                ..}, Ready)
+
+loop :: State \-> [BS.ByteString] \-> IO Status
+loop State{..} packets = do
+    (State{..}, status) <\- lexer State{..}
+    case status of
+        End \-> do
+            debug $ printf \(dqdone: got %d packets\en\(dq _recv
+            return End
+        Waiting \-> do
+            debug $ printf \(dqwaiting...\en\(dq
+            packets\(aq <\- case packets of
+                [] \-> do
+                    writeChan _pipe BS.empty
+                    return []
+                p:ps \-> do
+                    debug $ printf \(dqsent packet \(aq%s\(aq\en\(dq (show p)
+                    writeChan _pipe p
+                    return ps
+            (State{..}, status\(aq) <\- fill State{..}
+            case status\(aq of
+                Ready \-> loop State{..} packets\(aq
+                _ \-> error \(dqunexpected status after fill\(dq
+        BadPacket \-> do
+            debug $ printf \(dqerror: ill\-formed packet\en\(dq
+            return BadPacket
+        _ \-> error \(dqunexpected status\(dq
+
+test :: [BS.ByteString] \-> Status \-> IO ()
+test packets expect = do
+    pipe <\- newChan \-\- emulate pipe using a chan of bytestrings
+    let st = State {
+        _pipe = pipe,
+        _yyinput = BS.singleton 0, \-\- null sentinel triggers YYFILL
+        _yycursor = 0,
+        _yymarker = 0,
+        _token = 0,
+        _yylimit = 0,
+        _eof = False,
+        _yystate = \-1,
+        _recv = 0
+    }
+    status <\- loop st packets
+    when (status /= expect) $ error \(dqfailed\(dq
+    return ()
+
+main :: IO ()
+main = do
+    test [] End
+    test [\(dqze\(dq, \(dqro;o\(dq, \(dqne\(dq, \(dq;t\(dq, \(dqwo;thr\(dq, \(dqe\(dq, \(dqe\(dq, \(dq;\(dq, \(dqfour;\(dq] End
+    test [\(dqzer0;\(dq] BadPacket
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+\-\- This example shows how to combine reusable re2c blocks: two blocks
+\-\- (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+\-\- in both blocks; the \(aqfish\(aq block takes priority because it is used
+\-\- earlier. Default rule * occurs in all three blocks; the local (not
+\-\- inherited) definition takes priority.
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+data Answer = Color | Fish | Dunno deriving (Eq)
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int,
+    _yymarker :: Int
+}
+
+%{rules:colors
+    *                            { error \(dqah\(dq }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { Color }
+%}
+
+%{rules:fish
+    *                            { error \(dqoh\(dq }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { Fish }
+%}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;Answer\(dq, \(dqState{..};State\(dq];
+    re2c:yyfill:enable = 0;
+
+    !use:fish;
+    !use:colors;
+    * { Dunno } // overrides inherited \(aq*\(aq rules
+%}
+
+main :: IO ()
+main = do
+    let test str ans = do
+            let st = State {_yyinput = str, _yycursor = 0, _yymarker = 0}
+            when (lexer st /= ans) $ error \(dqfailed\(dq
+
+    test \(dqsalmon\(dq Fish
+    test \(dqwhat?\(dq Dunno
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+\-\- This example supports multiple input encodings: UTF\-8 and UTF\-32.
+\-\- Both lexers are generated from the same rules block, and the use
+\-\- blocks add only encoding\-specific configurations.
+
+import Control.Monad (when)
+import Data.Array
+import Data.Word
+
+data State a = State {
+    _yyinput :: a,
+    _yycursor :: Int,
+    _yymarker :: Int
+}
+
+%{rules
+    re2c:yyfill:enable = 0;
+    re2c:define:YYPEEK = \(dq(!)\(dq;
+
+    \(dq∀x ∃y\(dq { Just _yycursor }
+    *       { Nothing }
+%}
+
+%{use
+    re2c:define:YYFN = [\(dqlex8;Maybe Int\(dq, \(dqState{..};State (Array Int Word8)\(dq];
+    re2c:encoding:utf8 = 1;
+    re2c:define:YYCTYPE = Word8;
+%}
+
+%{use
+    re2c:define:YYFN = [\(dqlex32;Maybe Int\(dq, \(dqState{..};State (Array Int Int)\(dq];
+    re2c:encoding:utf32 = 1;
+    re2c:define:YYCTYPE = Int;
+%}
+
+main :: IO ()
+main = do
+    let make_st l = State {
+            _yyinput = listArray (0, length l \- 1) l,
+            _yycursor = 0,
+            _yymarker = 0}
+
+    let s8 = [0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79]
+    when (lex8 (make_st s8) /= Just (length s8)) $ error \(dqlex8 failed\(dq
+
+    let s32 = [0x2200, 0x78, 0x20, 0x2203, 0x79]
+    when (lex32 (make_st s32) /= Just (length s32)) $ error \(dqlex32 failed\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    \-\- Final tag bindings available in semantic action.
+    %{svars format = \(dq\en@@ :: !Int,\(dq; %}
+    \-\- Intermediate tag bindings used by the lexer (must be autogenerated).
+    %{stags format = \(dq\en@@ :: !Int,\(dq; %}
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int
+}
+
+data SemVer = SemVer {
+    major :: !Int,
+    minor :: !Int,
+    patch :: !Int
+} deriving (Eq)
+
+s2n :: ByteString \-> Int \-> Int \-> Int
+s2n s i j = f i 0 where
+    f k n = if k >= j then n else f (k + 1) (n * 10 + (fromIntegral (index s k) \- 48))
+
+%{
+    re2c:define:YYFN = [\(dqparse;Maybe SemVer\(dq, \(dqState{..};State\(dq];
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    @_1 num @_2 \(dq.\(dq @_3 num @_4 (\(dq.\(dq @_5 num)? [\ex00] {
+        Just SemVer {
+            major = s2n _yyinput _1 _2,
+            minor = s2n _yyinput _3 _4,
+            patch = if _5 == (\-1) then 0 else s2n _yyinput _5 (_yycursor \- 1)
+        }
+    }
+    * { Nothing }
+%}
+
+test :: ByteString \-> Maybe SemVer \-> IO ()
+test str expect = do
+    let s = State {
+        %{svars format = \(dq\en@@ = (\-1),\(dq; %}
+        %{stags format = \(dq\en@@ = (\-1),\(dq; %}
+        _yyinput = str,
+        _yycursor = 0,
+        _yymarker = 0
+    }
+    when (parse s /= expect) $ error \(dqfailed!\(dq
+
+main :: IO ()
+main = do
+    test \(dq23.34\e0\(dq (Just SemVer {major = 23, minor = 34, patch = 0})
+    test \(dq1.2.99999\e0\(dq (Just SemVer {major = 1, minor = 2, patch = 99999})
+    test \(dq1.a\e0\(dq Nothing
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad
+import Data.ByteString as BS
+import GHC.IO.Handle
+import System.Directory
+import System.IO
+
+chunk_size :: Int
+chunk_size = 4096
+
+data State = State {
+    _file :: !Handle,
+    _yyinput :: !BS.ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int,
+    _yylimit :: !Int,
+    _token :: !Int,
+    \-\- Final tag bindings available in semantic action.
+    %{svars format = \(dq\en@@ :: !Int,\(dq; %}
+    \-\- Intermediate tag bindings used by the lexer (must be autogenerated).
+    %{stags format = \(dq\en@@ :: !Int,\(dq; %}
+    _eof :: !Bool
+}
+
+data SemVer = SemVer {
+    major :: !Int,
+    minor :: !Int,
+    patch :: !Int
+} deriving (Eq, Show)
+
+s2n :: BS.ByteString \-> Int \-> Int \-> Int
+s2n s i j = f i 0 where
+    f k n = if k >= j then n else f (k + 1) (n * 10 + (fromIntegral (BS.index s k) \- 48))
+
+%{
+    re2c:define:YYFN = [\(dqlexer;IO [SemVer]\(dq, \(dqState{..};State\(dq, \(dq_vers;[SemVer]\(dq];
+    re2c:define:YYPEEK = \(dqBS.index\(dq;
+    re2c:define:YYFILL = \(dq(State{..}, yyfill) <\- fill State{..}\(dq;
+    re2c:eof = 0;
+    re2c:monadic = 1;
+    re2c:tags = 1;
+
+    num = [0\-9]+;
+
+    @_1 num @_2 \(dq.\(dq @_3 num @_4 (\(dq.\(dq @_5 num)? [\en] {
+        let ver = SemVer {
+            major = s2n _yyinput _1 _2,
+            minor = s2n _yyinput _3 _4,
+            patch = if _5 == (\-1) then 0 else s2n _yyinput _5 (_yycursor \- 1)
+        }
+        lexer State{..} (ver: _vers)
+    }
+    $ { return _vers }
+    * { error \(dqlexer failed\(dq }
+%}
+
+fill :: State \-> IO (State, Bool)
+fill State{..} = do
+    case _eof of
+        True \-> return (State{..}, False)
+        False \-> do
+            \-\- Discard everything up to the current token, cut off terminating null,
+            \-\- read new chunk from file and reappend terminating null at the end.
+            chunk <\- BS.hGet _file chunk_size
+            return (State{
+                _yyinput = BS.concat [(BS.init . BS.drop _token) _yyinput, chunk, \(dq\e0\(dq],
+                _yycursor = _yycursor \- _token,
+                _yymarker = _yymarker \- _token,
+                _yylimit = _yylimit \- _token + BS.length chunk, \-\- exclude terminating null
+                _token = 0,
+                _eof = BS.null chunk, \-\- end of file?
+                ..}, True)
+
+main :: IO ()
+main = do
+    let fname = \(dqinput\(dq
+
+    \-\- Prepare input file.
+    BS.writeFile fname $ BS.concat [\(dq1.22.333\en\(dq | _ <\- [1..chunk_size]]
+    let expect = [SemVer {major = 1, minor = 22, patch = 333} | _ <\- [1..chunk_size]]
+
+    \-\- Run lexer on the prepared file.
+    fh <\- openFile fname ReadMode
+    let st = State {
+        _file = fh,
+        _yyinput = BS.singleton 0,
+        _yycursor = 0,
+        _yymarker = 0,
+        _yylimit = 0,
+        _token = 0,
+        %{svars format = \(dq\en@@ = (\-1),\(dq; %}
+        %{stags format = \(dq\en@@ = (\-1),\(dq; %}
+        _eof = False
+    }
+    result <\- lexer st []
+    hClose fh
+
+    \-\- Cleanup.
+    removeFile fname
+
+    \-\- Check result.
+    when (result /= expect) $ error $ \(dqexpected \(dq ++ show expect ++ \(dq, got \(dq ++ show result
+    return ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+import Data.Word (Word8)
+
+none :: Int
+none = \-1
+
+data State = State {
+    \-\- Final tag bindings available in semantic action.
+    %{svars format = \(dq\en@@ :: !Int,\(dq; %}
+    \-\- Intermediate tag bindings used by the lexer (must be autogenerated).
+    %{stags format = \(dq\en@@ :: !Int,\(dq; %}
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int
+}
+
+data SemVer = SemVer {
+    major :: !Int,
+    minor :: !Int,
+    patch :: !Int
+} deriving (Eq)
+
+s2n :: ByteString \-> Int \-> Int \-> Int
+s2n s i j = f i 0 where
+    f k n = if k >= j then n else f (k + 1) (n * 10 + (fromIntegral (index s k) \- 48))
+
+%{
+    re2c:define:YYFN = [\(dqparse;Maybe SemVer\(dq, \(dqState{..};State\(dq];
+    re2c:define:YYCTYPE = \(dqWord8\(dq;
+    re2c:captvars = 1;
+    re2c:variable:yypmatch = _;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+        Just SemVer {
+            major = s2n _yyinput _yytl1 _yytr1,
+            minor = s2n _yyinput _yytl2 _yytr2,
+            patch = if _yytl3 == none then 0 else s2n _yyinput (_yytl3 + 1) _yytr3
+        }
+    }
+    * { Nothing }
+%}
+
+test :: ByteString \-> Maybe SemVer \-> IO ()
+test str expect = do
+    let s = State {
+        %{svars format = \(dq\en@@ = none,\(dq; %}
+        %{stags format = \(dq\en@@ = none,\(dq; %}
+        _yyinput = str,
+        _yycursor = 0,
+        _yymarker = 0
+    }
+    when (parse s /= expect) $ error \(dqfailed!\(dq
+
+main :: IO ()
+main = do
+    test \(dq23.34\e0\(dq (Just SemVer {major = 23, minor = 34, patch = 0})
+    test \(dq1.2.99999\e0\(dq (Just SemVer {major = 1, minor = 2, patch = 99999})
+    test \(dq1.a\e0\(dq Nothing
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+data State = State {
+    \-\- Final tag bindings available in semantic action.
+    %{svars format = \(dq\en@@ :: !Int,\(dq; %}
+    %{mvars format = \(dq\en@@ :: ![Int],\(dq; %}
+    \-\- Intermediate tag bindings used by the lexer (must be autogenerated).
+    %{stags format = \(dq\en@@ :: !Int,\(dq; %}
+    %{mtags format = \(dq\en@@ :: ![Int],\(dq; %}
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    _yymarker :: !Int
+}
+
+s2n :: ByteString \-> Int \-> Int \-> Int
+s2n s i j = f i 0 where
+    f k n = if k >= j then n else f (k + 1) (n * 10 + (fromIntegral (index s k) \- 48))
+
+%{
+    re2c:define:YYFN = [\(dqparse;Maybe [Int]\(dq, \(dqState{..};State\(dq];
+    re2c:define:YYMTAGP = \(dqlet tag = _yycursor : @@{tag} in let @@{tag} = tag in\(dq;
+    re2c:define:YYMTAGN = \(dq\(dq; // alternatively could add \-1 to the list
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    @_1 num @_2 (\(dq.\(dq #_3 num #_4)* [\ex00] {
+        Just $ (s2n _yyinput _1 _2) : (reverse $ zipWith (\ei j \-> s2n _yyinput i j) _3 _4)
+    }
+    * { Nothing }
+%}
+
+test :: ByteString \-> Maybe [Int] \-> IO ()
+test str expect = do
+    let st = State {
+        %{svars format = \(dq\en@@ = (\-1),\(dq; %}
+        %{stags format = \(dq\en@@ = (\-1),\(dq; %}
+        %{mvars format = \(dq\en@@ = [],\(dq; %}
+        %{mtags format = \(dq\en@@ = [],\(dq; %}
+        _yyinput = str,
+        _yycursor = 0,
+        _yymarker = 0
+    }
+    when (parse st /= expect) $ error \(dqfailed!\(dq
+
+main :: IO ()
+main = do
+    test \(dq1\e0\(dq (Just [1])
+    test \(dq1.2.3.4.5.6.7\e0\(dq (Just [1, 2, 3, 4, 5, 6, 7])
+    test \(dq1.2.\e0\(dq Nothing
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-\-utf8 \-i
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+%{include \(dqunicode_categories.re\(dq %}
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int,
+    _yymarker :: Int,
+    _yyaccept :: Int
+}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;Bool\(dq, \(dqState{..};State\(dq];
+    re2c:yyfill:enable = 0;
+
+    // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+    // (see https://unicode.org/reports/tr31)
+    id_start    = L | Nl | [$_];
+    id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+    identifier  = id_start id_continue*;
+
+    identifier { True }
+    *          { False }
+%}
+
+main :: IO ()
+main = do
+    let st = State {
+            _yyinput = \(dq_Ыдентификатор\ex00\(dq,
+            _yycursor = 0,
+            _yymarker = 0,
+            _yyaccept = 0}
+
+    when (not $ lexer st) $ error \(dqfailed\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.hs)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+data Number = INum | FNum | NNaN deriving (Eq)
+
+%{
+    number = [1\-9][0\-9]*;
+%}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { FNum }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-i
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (ByteString, index)
+
+%{include \(dqdefinitions.hs\(dq %}
+
+data State = State {
+    _yyinput :: ByteString,
+    _yycursor :: Int,
+    _yymarker :: Int,
+    _yyaccept :: Int
+}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;Number\(dq, \(dqState{..};State\(dq];
+    re2c:yyfill:enable = 0;
+
+    *      { NNaN }
+    number { INum }
+    !include \(dqextra_rules.re.inc\(dq;
+%}
+
+main :: IO ()
+main = do
+    let test s n = do
+            let st = State {
+                    _yyinput = s,
+                    _yycursor = 0,
+                    _yymarker = 0,
+                    _yyaccept = 0}
+
+            when (lexer st /= n) $ error \(dqfailed\(dq
+
+    test \(dq123\e0\(dq INum
+    test \(dq123.4567\e0\(dq FNum
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- re2hs $INPUT \-o $OUTPUT \-\-header lexer/state.hs \-i
+{\-# OPTIONS_GHC \-Wno\-unused\-record\-wildcards #\-}
+{\-# LANGUAGE OverloadedStrings #\-}
+
+import Control.Monad (when)
+import Data.ByteString (index)
+import State
+
+%{header:on %}
+module State where
+
+import Data.ByteString (ByteString)
+
+data State = State {
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    %{stags format = \(dq\en@@{tag} :: !Int,\(dq; %}
+    _tag :: !Int
+}
+%{header:off %}
+
+%{
+    re2c:define:YYFN = [\(dqlexer;Int\(dq, \(dqState{..};State\(dq];
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+    re2c:header = \(dqlexer/state.hs\(dq;
+
+    [a]* @_tag [b]* { _tag }
+%}
+
+main :: IO ()
+main = do
+    let s = State {
+        _yyinput = \(dqab\e0\(dq,
+        _yycursor = 0,
+        %{stags format = \(dq\en@@{tag} = \-1,\(dq; %}
+        _tag = 0}
+
+    when (lexer s /= 1) $ error \(dqfailed!\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+\-\- Generated by re2c
+{\-# LANGUAGE RecordWildCards #\-}
+
+module State where
+
+import Data.ByteString (ByteString)
+
+data State = State {
+    _yyinput :: !ByteString,
+    _yycursor :: !Int,
+    
+_yyt1 :: !Int,
+    _tag :: !Int
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2java.1 b/bootstrap/doc/re2java.1
new file mode 100644
index 000000000..93cdfccd8
--- /dev/null
+++ b/bootstrap/doc/re2java.1
@@ -0,0 +1,3556 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for Java, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    static boolean lex(String yyinput) {
+        int yycursor = 0;
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:yyfill:enable = 0;
+
+            number = [1\-9][0\-9]*;
+
+            number { return true; }
+            *      { return false; }
+        */
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq1234\e0\(dq);
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2java
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    static boolean lex(String yyinput) {
+        int yycursor = 0;
+
+        
+{
+    char yych = 0;
+    int yystate = 0;
+    yyl: while (true) {
+        switch (yystate) {
+            case 0:
+                yych = yyinput.charAt(yycursor);
+                yycursor += 1;
+                switch (yych) {
+                    case 0x31:
+                    case 0x32:
+                    case 0x33:
+                    case 0x34:
+                    case 0x35:
+                    case 0x36:
+                    case 0x37:
+                    case 0x38:
+                    case 0x39:
+                        yystate = 2;
+                        continue yyl;
+                    default:
+                        yystate = 1;
+                        continue yyl;
+                }
+            case 1:
+                { return false; }
+            case 2:
+                yych = yyinput.charAt(yycursor);
+                switch (yych) {
+                    case 0x30:
+                    case 0x31:
+                    case 0x32:
+                    case 0x33:
+                    case 0x34:
+                    case 0x35:
+                    case 0x36:
+                    case 0x37:
+                    case 0x38:
+                    case 0x39:
+                        yycursor += 1;
+                        yystate = 2;
+                        continue yyl;
+                    default:
+                        yystate = 3;
+                        continue yyl;
+                }
+            case 3:
+                { return true; }
+            default:
+                throw new IllegalStateException(\(dqinternal lexer error\(dq);
+        }
+    }
+}
+
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq1234\e0\(dq);
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2java has three API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBSimple API\fP
+This is the default API for the Java backend. It consists of the following
+primitives: \fBYYINPUT\fP (which should be defined as a sequence of code
+units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
+\fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBRecord API\fP
+Record API is useful in cases when lexer state must be stored in a class.
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
+name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
+defined as a class with fields \fByyinput\fP, \fByycursor\fP, \fByymarker\fP,
+\fByyctxmarker\fP, \fByylimit\fP (only the fields used by the generated code
+need to be defined, and their names can be configured).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    // Expects a null\-terminated string.
+    static int lex(String yyinput) {
+        int yycursor = 0;
+        int count = 0;
+
+        loop: while (true) {
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqchar\(dq;
+                re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+                re2c:yyfill:enable = 0;
+
+                *      { return \-1; }
+                [\ex00] { return count; }
+                [a\-z]+ { count += 1; continue loop; }
+                [ ]+   { continue loop; }
+            */
+        }
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq\e0\(dq) == 0;
+        assert lex(\(dqone two three\e0\(dq) == 3;
+        assert lex(\(dqf0ur\e0\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    // Expects a null\-terminated string.
+    static int lex(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+        int yylimit = yyinput.length() \- 1; // yylimit points at the terminating null
+        int count = 0;
+
+        loop: while (true) {
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqchar\(dq;
+                re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+                re2c:yyfill:enable = 0;
+                re2c:eof = 0;
+
+                str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+                *    { return \-1; }
+                $    { return count; }
+                str  { count += 1; continue loop; }
+                [ ]+ { continue loop; }
+            */
+        }
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq\e0\(dq) == 0;
+        assert lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \e0\(dq) == 3;
+        assert lex(\(dq\(aqunterminated\e\e\(aq\e0\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    /*!max:re2c*/
+
+    // Expects yymaxfill\-padded string.
+    static int lex(String str) {
+        // Pad string with yymaxfill zeroes at the end.
+        byte[] yyinput = new byte[str.length() + YYMAXFILL];
+        System.arraycopy(str.getBytes(), 0, yyinput, 0, str.length()); 
+
+        int yycursor = 0;
+        int yylimit = yyinput.length;
+        int count = 0;
+
+        loop: while (true) {
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dqyyinput[yycursor]\(dq;
+                re2c:define:YYFILL = \(dqreturn \-1;\(dq;
+
+                str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+                [\ex00] {
+                    // Check that it is the sentinel, not some unexpected null.
+                    return (yycursor \- 1 == str.length()) ? count : \-1;
+                }
+                str  { count += 1; continue loop; }
+                [ ]+ { continue loop; }
+                *    { return \-1; }
+            */
+        }
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq\(dq) == 0;
+        assert lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq) == 3;
+        assert lex(\(dq\(aqunterminated\e\e\(aq\(dq) == \-1;
+        assert lex(\(dq\(aqunexpected \e00 null\e\e\(aq\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Main {
+    // Expects a string without terminating null.
+    static int lex(String str) {
+        byte[] yyinput = str.getBytes();
+        int yycursor = 0;
+        int count = 0;
+
+        loop: while (true) {
+            /*!re2c
+                re2c:api = generic;
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dq(yycursor < yyinput.length) ? yyinput[yycursor] : 0\(dq;
+                re2c:define:YYSKIP = \(dqyycursor += 1;\(dq;
+                re2c:yyfill:enable = 0;
+
+                *      { return \-1; }
+                [\ex00] { return count; }
+                [a\-z]+ { count += 1; continue loop; }
+                [ ]+   { continue loop; }
+            */
+        }
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq\(dq) == 0;
+        assert lex(\(dqone two three\(dq) == 3;
+        assert lex(\(dqf0ur\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.io.*;
+import java.nio.file.*;
+
+class Lexer {
+    public static final int BUFSIZE = 4096;
+
+    private BufferedInputStream stream;
+    private byte[] yyinput;
+    private int yycursor;
+    private int yymarker;
+    private int yylimit;
+    private int token;
+    private boolean eof;
+
+    public Lexer(File file) throws FileNotFoundException {
+        stream = new BufferedInputStream(new FileInputStream(file));
+        // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+        yyinput = new byte[BUFSIZE + 1];
+        yycursor = yymarker = yylimit = token = BUFSIZE;
+        eof = false;
+    }
+
+    private int fill() throws IOException {
+        if (eof) { return \-1; } // unexpected EOF
+
+        // Error: lexeme too long. In real life can reallocate a larger buffer.
+        if (token < 1) { return \-2; }
+
+        // Shift buffer contents (discard everything up to the current token).
+        System.arraycopy(yyinput, token, yyinput, 0, yylimit \- token); 
+        yycursor \-= token;
+        yymarker \-= token;
+        yylimit \-= token;
+        token = 0;
+
+        // Fill free space at the end of buffer with new data from file.
+        yylimit += stream.read(yyinput, yylimit, BUFSIZE \- yylimit);
+        yyinput[yylimit] = 0; // append sentinel symbol
+
+        // If read less than expected, this is the end of input.
+        eof = yylimit < BUFSIZE;
+
+        return 0;
+    }
+
+    // Expects a null\-terminated string.
+    public int lex() throws IOException {
+        int count = 0;
+        loop: while (true) {
+            token = yycursor;
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dqyyinput[yycursor]\(dq;
+                re2c:define:YYFILL = \(dqfill() == 0\(dq;
+                re2c:eof = 0;
+
+                str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+                *    { return \-1; }
+                $    { return count; }
+                str  { count += 1; continue loop; }
+                [ ]+ { continue loop; }
+            */
+        }
+    }
+
+    public static void main(String []args) throws FileNotFoundException, IOException {
+        String fname = \(dqinput\(dq;
+        String content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq.repeat(Lexer.BUFSIZE);
+
+        // Prepare input file: a few times the size of the buffer, containing
+        // strings with zeroes and escaped quotes.
+        Files.writeString(Paths.get(fname), content);
+
+        int count = 3 * Lexer.BUFSIZE; // number of quoted strings written to file
+
+        // Prepare lexer state: all offsets are at the end of buffer.
+        File file = new File(\(dq.\(dq, fname);
+        Lexer lexer = new Lexer(file);
+
+        // Run the lexer.
+        int n = lexer.lex();
+        assert n == count;
+
+        // Cleanup: remove input file.
+        file.delete();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.io.*;
+import java.nio.file.*;
+import java.util.Arrays;
+
+class Lexer {
+    /*!max:re2c*/
+    public static final int BUFSIZE = 4096;
+
+    private BufferedInputStream stream;
+    private byte[] yyinput;
+    private int yycursor;
+    private int yylimit;
+    private int token;
+    private boolean eof;
+
+    public Lexer(File file) throws FileNotFoundException {
+        stream = new BufferedInputStream(new FileInputStream(file));
+        // Prepare lexer state: all offsets are at the end of buffer.
+        // This immediately triggers YYFILL, as the YYLESSTHAN condition is true.
+        yyinput = new byte[BUFSIZE + YYMAXFILL];
+        yycursor = yylimit = token = BUFSIZE;
+        eof = false;
+    }
+
+    private int fill(int need) throws IOException {
+        if (eof) { return \-1; } // unexpected EOF
+
+        // Error: lexeme too long. In real life can reallocate a larger buffer.
+        if (token < need) { return \-2; }
+
+        // Shift buffer contents (discard everything up to the current token).
+        System.arraycopy(yyinput, token, yyinput, 0, yylimit \- token); 
+        yycursor \-= token;
+        yylimit \-= token;
+        token = 0;
+
+        // Fill free space at the end of buffer with new data from file.
+        yylimit += stream.read(yyinput, yylimit, BUFSIZE \- yylimit);
+        yyinput[yylimit] = 0; // append sentinel symbol
+
+        // If read less than expected, this is the end of input.
+        if (yylimit < BUFSIZE) {
+            eof = true;
+            Arrays.fill(yyinput, yylimit, yylimit + YYMAXFILL, (byte)0);
+            yylimit += YYMAXFILL;
+        }
+
+        return 0;
+    }
+
+    // Expects a null\-terminated string.
+    public int lex() throws IOException {
+        int count = 0;
+        loop: while (true) {
+            token = yycursor;
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dqyyinput[yycursor]\(dq;
+                re2c:define:YYFILL = \(dqif (fill(@@) != 0) { return \-2; }\(dq;
+
+                str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+                [\ex00] {
+                    // Check that it is the sentinel, not some unexpected null.
+                    return (token == yylimit \- YYMAXFILL) ? count : \-1;
+                }
+                str  { count += 1; continue loop; }
+                [ ]+ { continue loop; }
+                *    { return \-1; }
+            */
+        }
+    }
+
+    public static void main(String []args) throws FileNotFoundException, IOException {
+        String fname = \(dqinput\(dq;
+        String content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq.repeat(Lexer.BUFSIZE);
+
+        // Prepare input file: a few times the size of the buffer, containing
+        // strings with zeroes and escaped quotes.
+        Files.writeString(Paths.get(fname), content);
+
+        int count = 3 * Lexer.BUFSIZE; // number of quoted strings written to file
+
+        // Prepare lexer state: all offsets are at the end of buffer.
+        File file = new File(\(dq.\(dq, fname);
+        Lexer lexer = new Lexer(file);
+
+        // Run the lexer.
+        int n = lexer.lex();
+        assert n == count;
+
+        // Cleanup: remove input file.
+        file.delete();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+class Parser {
+    private String yyinput;
+    private int yycursor;
+    private int yymarker;
+    private int number;
+
+    private void add_digit(int base, int offset) throws ArithmeticException {
+        number = Math.addExact(
+            Math.multiplyExact(number, base),
+            yyinput.charAt(yycursor \- 1) \- offset);
+    }
+
+    public int parse(String str) throws ArithmeticException, IllegalArgumentException {
+        yyinput = str;
+        yycursor = 0;
+        number = 0;
+
+        try {
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqchar\(dq;
+                re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+                re2c:yyfill:enable = 0;
+
+                end = \(dq\ex00\(dq;
+
+                \(aq0b\(aq / [01]        { return parse_bin(); }
+                \(dq0\(dq                { return parse_oct(); }
+                \(dq\(dq   / [1\-9]       { return parse_dec(); }
+                \(aq0x\(aq / [0\-9a\-fA\-F] { return parse_hex(); }
+                *                  { throw new IllegalArgumentException(\(dqnot a number\(dq); }
+            */
+        } catch (Exception e) {
+            return \-1;
+        }
+    }
+
+    private int parse_bin() throws ArithmeticException, IllegalArgumentException {
+        /*!re2c
+            end   { return number; }
+            [01]  { add_digit(2, 48); return parse_bin(); }
+            *     { throw new IllegalArgumentException(\(dqill\-formed binary number\(dq); }
+        */
+    }
+
+    private int parse_oct() throws ArithmeticException, IllegalArgumentException {
+        /*!re2c
+            end   { return number; }
+            [0\-7] { add_digit(8, 48); return parse_oct(); }
+            *     { throw new IllegalArgumentException(\(dqill\-formed octal number\(dq); }
+        */
+    }
+
+    private int parse_dec() throws ArithmeticException, IllegalArgumentException {
+        /*!re2c
+            end   { return number; }
+            [0\-9] { add_digit(10, 48); return parse_dec(); }
+            *     { throw new IllegalArgumentException(\(dqill\-formed decimal number\(dq); }
+        */
+    }
+
+    private int parse_hex() throws ArithmeticException, IllegalArgumentException {
+        /*!re2c
+            end   { return number; }
+            [0\-9] { add_digit(16, 48); return parse_hex(); }
+            [a\-f] { add_digit(16, 87); return parse_hex(); }
+            [A\-F] { add_digit(16, 55); return parse_hex(); }
+            *     { throw new IllegalArgumentException(\(dqill\-formed hexadecimal number\(dq); }
+        */
+    }
+
+    public static void main(String []args) {
+        Parser parser = new Parser();
+        assert parser.parse(\(dq1234567890\e0\(dq) == 1234567890;
+        assert parser.parse(\(dq0b1101\e0\(dq) == 0b1101;
+        assert parser.parse(\(dq0x007Fe\e0\(dq) == 0x7fe;
+        assert parser.parse(\(dq0644\e0\(dq) == 0644;
+        assert parser.parse(\(dq9999999999\e0\(dq) == \-1;
+        assert parser.parse(\(dq123??\e0\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT \-c
+
+class Parser {
+    /*!conditions:re2c*/
+    private String yyinput;
+    private int yycursor;
+    private int yymarker;
+    private int number;
+
+    private void add_digit(int base, int offset) throws ArithmeticException {
+        number = Math.addExact(
+            Math.multiplyExact(number, base),
+            yyinput.charAt(yycursor \- 1) \- offset);
+    }
+
+    public int parse(String str) throws ArithmeticException, IllegalArgumentException {
+        yyinput = str;
+        yycursor = 0;
+        int yycond = YYC_init;
+
+        number = 0;
+        try {
+            loop: while (true) {
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqchar\(dq;
+                re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+                re2c:yyfill:enable = 0;
+
+                <*> * { throw new IllegalArgumentException(\(dqill\-formed number\(dq); }
+
+                <init> \(aq0b\(aq / [01]        :=> bin
+                <init> \(dq0\(dq                :=> oct
+                <init> \(dq\(dq   / [1\-9]       :=> dec
+                <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+
+                <bin, oct, dec, hex> \(dq\ex00\(dq { return number; }
+
+                <bin> [01]  { add_digit(2, 48); continue loop; }
+                <oct> [0\-7] { add_digit(8, 48); continue loop; }
+                <dec> [0\-9] { add_digit(10, 48); continue loop; }
+                <hex> [0\-9] { add_digit(16, 48); continue loop; }
+                <hex> [a\-f] { add_digit(16, 87); continue loop; }
+                <hex> [A\-F] { add_digit(16, 55); continue loop; }
+            */
+            }
+        } catch (Exception e) {
+            return \-1;
+        }
+    }
+
+    public static void main(String []args) {
+        Parser parser = new Parser();
+        assert parser.parse(\(dq1234567890\e0\(dq) == 1234567890;
+        assert parser.parse(\(dq0b1101\e0\(dq) == 0b1101;
+        assert parser.parse(\(dq0x007Fe\e0\(dq) == 0x7fe;
+        assert parser.parse(\(dq0644\e0\(dq) == 0644;
+        assert parser.parse(\(dq9999999999\e0\(dq) == \-1;
+        assert parser.parse(\(dq123??\e0\(dq) == \-1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT \-f
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Pipe;
+
+class Lexer {
+    enum Status {
+        END,
+        READY,
+        WAITING,
+        BIG_PACKET,
+        BAD_PACKET
+    };
+
+    // Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+    // In real world use a larger buffer.
+    public static final int BUFSIZE = 10;
+
+    public static class State {
+        Pipe.SourceChannel source;
+        byte[] yyinput;
+        int yycursor;
+        int yymarker;
+        int yylimit;
+        int token;
+        int yystate;
+        int received;
+
+        public State(Pipe pipe) {
+            source = pipe.source();
+            // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+            yyinput = new byte[BUFSIZE + 1];
+            yycursor = yymarker = yylimit = token = BUFSIZE;
+            yystate = \-1;
+            received = 0;
+        }
+    }
+
+    private static void log(String format, Object... args) {
+        if (false) { System.out.printf(format + \(dq\en\(dq, args); }
+    }
+
+    private static Status fill(State st) throws IOException {
+        // Error: lexeme too long. In real life can reallocate a larger buffer.
+        if (st.token < 1) { return Status.BIG_PACKET; }
+
+        // Shift buffer contents (discard everything up to the current token).
+        System.arraycopy(st.yyinput, st.token, st.yyinput, 0, st.yylimit \- st.token); 
+        st.yycursor \-= st.token;
+        st.yymarker \-= st.token;
+        st.yylimit \-= st.token;
+        st.token = 0;
+
+        // Fill free space at the end of buffer with new data from file.
+        ByteBuffer buffer = ByteBuffer.wrap(st.yyinput, st.yylimit, BUFSIZE \- st.yylimit);
+        int have = st.source.read(buffer);
+        if (have != \-1) st.yylimit += have; // \-1 means that pipe is closed
+        st.yyinput[st.yylimit] = 0; // append sentinel symbol
+
+        return Status.READY;
+    }
+
+    private static Status lex(State yyrecord) {
+        byte yych;
+        loop: while (true) {
+            yyrecord.token = yyrecord.yycursor;
+            /*!re2c
+                re2c:api = record;
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dqyyrecord.yyinput[yyrecord.yycursor]\(dq;
+                re2c:define:YYFILL = \(dqreturn Status.WAITING;\(dq;
+                re2c:eof = 0;
+
+                packet = [a\-z]+[;];
+
+                *      { return Status.BAD_PACKET; }
+                $      { return Status.END; }
+                packet { yyrecord.received += 1; continue loop; }
+            */
+        }
+    }
+
+    public static void test(String[] packets, Status expect) throws IOException {
+        // Create a pipe.
+        Pipe pipe = Pipe.open();
+        Pipe.SinkChannel sink = pipe.sink();
+
+        // Initialize lexer state
+        Lexer.State st = new Lexer.State(pipe);
+
+        // Main loop. The buffer contains incomplete data which appears packet by
+        // packet. When the lexer needs more input it saves its internal state and
+        // returns to the caller which should provide more input and resume lexing.
+        int send = 0;
+        Status status;
+        while (true) {
+            status = lex(st);
+
+            if (status == Status.END) {
+                log(\(dqdone: got %d packets\(dq, st.received);
+                break;
+            } else if (status == Status.WAITING) {
+                log(\(dqwaiting...\(dq);
+
+                if (send < packets.length) {
+                    log(\(dqsent packet %d: %s\(dq, send, packets[send]);
+                    ByteBuffer buffer = ByteBuffer.wrap(packets[send].getBytes());
+                    sink.write(buffer);
+                    send += 1;
+                } else {
+                    sink.close();
+                }
+
+                status = fill(st);
+                if (status == Status.BIG_PACKET) {
+                    log(\(dqerror: packet too big\(dq);
+                    break;
+                }
+                assert status == Status.READY;
+            } else {
+                assert status == Status.BAD_PACKET;
+                log(\(dqerror: ill\-formed packet\(dq);
+                break;
+            }
+        }
+
+        // Check results.
+        assert status == expect;
+        if (status == Status.END) {
+            assert send == st.received;
+        }
+    }
+
+    public static void main(String []args) throws IOException {
+        test(new String[]{}, Status.END);
+        test(new String[]{\(dqzero;\(dq, \(dqone;\(dq, \(dqtwo;\(dq, \(dqthree;\(dq, \(dqfour;\(dq}, Status.END);
+        test(new String[]{\(dqzer0;\(dq}, Status.BAD_PACKET);
+        test(new String[]{\(dqgoooooooooogle;\(dq}, Status.BIG_PACKET);
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+// This example shows how to combine reusable re2c blocks: two blocks
+// (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+// in both blocks; the \(aqfish\(aq block takes priority because it is used
+// earlier. Default rule * occurs in all three blocks; the local (not
+// inherited) definition takes priority.
+
+/*!rules:re2c:colors
+    *                            { throw new IllegalArgumentException(\(dqah\(dq); }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { return Ans.COLOR; }
+*/
+
+/*!rules:re2c:fish
+    *                            { throw new IllegalArgumentException(\(dqoh\(dq); }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { return Ans.FISH; }
+*/
+
+class Main {
+    enum Ans {COLOR, FISH, DUNNO};
+
+    static Ans lex(String yyinput) { // no\-throw, as \(aq*\(aq rules are overridden
+        int yycursor = 0;
+        int yymarker = 0;
+
+        /*!re2c
+            re2c:yyfill:enable = 0;
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+
+            !use:fish;
+            !use:colors;
+            * { return Ans.DUNNO; } // overrides inherited \(aq*\(aq rules
+        */
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dqsalmon\(dq) == Ans.FISH;
+        assert lex(\(dqwhat?\(dq) == Ans.DUNNO;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+
+// This example supports multiple input encodings: UTF\-8 and UTF\-32.
+// Both lexers are generated from the same rules block, and the use
+// blocks add only encoding\-specific configurations.
+
+/*!rules:re2c
+    re2c:yyfill:enable = 0;
+    re2c:define:YYPEEK = \(dqyyinput[yycursor]\(dq;
+    re2c:indent:top = 1;
+
+    \(dq∀x ∃y\(dq { return true; }
+    *       { return false; }
+*/
+
+class Main {
+    static boolean lex_utf8(int[] yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+        /*!use:re2c
+            re2c:define:YYCTYPE = \(dqint\(dq; // should be \(gabyte\(ga, but it\(aqs signed in Java
+            re2c:encoding:utf8 = 1;
+        */
+    }
+
+    static boolean lex_utf32(int[] yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+        /*!use:re2c
+            re2c:define:YYCTYPE = \(dqint\(dq;
+            re2c:encoding:utf32 = 1;
+        */
+    }
+
+    public static void main(String []args) {
+        // we have to use \(gaint\(ga, because \(gabyte\(gain Java cannot represent values greater than 127
+        int[] s_utf8 = new int[]{0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79};
+        assert lex_utf8(s_utf8);
+
+        int[] s_utf32 = new int[]{0x2200, 0x78, 0x20, 0x2203, 0x79};
+        assert lex_utf32(s_utf32);
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.util.Optional;
+
+class Main {
+    static class SemVer {
+        int major;
+        int minor;
+        int patch;
+
+        public SemVer(int m, int n, int k) {
+            major = m;
+            minor = n;
+            patch = k;
+        }
+
+        public boolean equals(SemVer v) {
+            return major == v.major && minor == v.minor && patch == v.patch;
+        }
+    };
+
+    static Optional<SemVer> parse(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+
+        // Final tag variables available in semantic action.
+        /*!svars:re2c format = \(dqint @@;\(dq; */
+
+        // Intermediate tag variables used by the lexer (must be autogenerated).
+        /*!stags:re2c format = \(dqint @@ = \-1;\(dq; */
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:yyfill:enable = 0;
+            re2c:tags = 1;
+
+            num = [0\-9]+;
+
+            @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+                int major = Integer.valueOf(yyinput.substring(t1, t2));
+                int minor = Integer.valueOf(yyinput.substring(t3, t4));
+                int patch = (t5 == \-1) ? 0 : Integer.valueOf(yyinput.substring(t5, yycursor \- 1));
+                return Optional.of(new SemVer(major, minor, patch));
+            }
+            * { return Optional.empty(); }
+        */
+    }
+
+    public static void main(String []args) {
+        assert parse(\(dq23.34\e0\(dq).get().equals(new SemVer(23, 34, 0));
+        assert parse(\(dq1.2.99999\e0\(dq).get().equals(new SemVer(1, 2, 99999));
+        assert !parse(\(dq1.a\e0\(dq).isPresent();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.io.*;
+import java.nio.file.*;
+import java.util.*;
+
+class Lexer {
+    static class SemVer {
+        int major;
+        int minor;
+        int patch;
+
+        public SemVer(int m, int n, int k) {
+            major = m;
+            minor = n;
+            patch = k;
+        }
+
+        public boolean equals(SemVer v) {
+            return major == v.major && minor == v.minor && patch == v.patch;
+        }
+    };
+
+    public static final int BUFSIZE = 4096;
+
+    private BufferedInputStream stream;
+    private byte[] yyinput;
+    private int yycursor;
+    private int yymarker;
+    private int yylimit;
+    private int token;
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(dqprivate int @@;\en\(dq; */
+    private boolean eof;
+
+    public Lexer(File file) throws FileNotFoundException {
+        stream = new BufferedInputStream(new FileInputStream(file));
+        // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+        yyinput = new byte[BUFSIZE + 1];
+        yycursor = yymarker = yylimit = token = BUFSIZE;
+        /*!stags:re2c format = \(dq@@ = \-1;\en\(dq; */
+        eof = false;
+    }
+
+    private int fill() throws IOException {
+        if (eof) { return \-1; } // unexpected EOF
+
+        // Error: lexeme too long. In real life can reallocate a larger buffer.
+        if (token < 1) { return \-2; }
+
+        // Shift buffer contents (discard everything up to the current token).
+        System.arraycopy(yyinput, token, yyinput, 0, yylimit \- token); 
+        yycursor \-= token;
+        yymarker \-= token;
+        yylimit \-= token;
+        /*!stags:re2c format = \(dqif (@@ != \-1) {@@ \-= token;}\en\(dq; */
+        token = 0;
+
+        // Fill free space at the end of buffer with new data from file.
+        yylimit += stream.read(yyinput, yylimit, BUFSIZE \- yylimit);
+        yyinput[yylimit] = 0; // append sentinel symbol
+
+        // If read less than expected, this is the end of input.
+        eof = yylimit < BUFSIZE;
+
+        return 0;
+    }
+
+    private int readInt(int tag1, int tag2) {
+        int n = 0;
+        for (int i = tag1; i < tag2; ++i) { n = n * 10 + (yyinput[i] \- 48); }
+        return n;
+    }
+
+    public Optional<ArrayList<SemVer>> lex() throws IOException {
+        ArrayList<SemVer> vers = new ArrayList<SemVer>();
+
+        // Final tag variables available in semantic action.
+        /*!svars:re2c format = \(dqint @@;\(dq; */
+
+        loop: while (true) {
+            token = yycursor;
+            /*!re2c
+                re2c:define:YYCTYPE = \(dqbyte\(dq;
+                re2c:define:YYPEEK = \(dqyyinput[yycursor]\(dq;
+                re2c:define:YYFILL = \(dqfill() == 0\(dq;
+                re2c:eof = 0;
+                re2c:tags = 1;
+
+                num = [0\-9]+;
+
+                @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\en] {
+                    int major = readInt(t1, t2);
+                    int minor = readInt(t3, t4);
+                    int patch = (t5 == \-1) ? 0 : readInt(t5, yycursor \- 1);
+                    vers.add(new SemVer(major, minor, patch));
+                    continue loop;
+                }
+                $ { return Optional.of(vers); }
+                * { return Optional.empty(); }
+            */
+        }
+    }
+
+    public static void main(String []args) throws FileNotFoundException, IOException {
+        String fname = \(dqinput\(dq;
+        String content = \(dq1.22.333\en\(dq.repeat(Lexer.BUFSIZE);
+
+        // Prepare input file: a few times the size of the buffer, containing
+        // strings with zeroes and escaped quotes.
+        Files.writeString(Paths.get(fname), content);
+
+        // Prepare lexer state: all offsets are at the end of buffer.
+        File file = new File(\(dq.\(dq, fname);
+        Lexer lexer = new Lexer(file);
+
+        // Run the lexer.
+        Optional<ArrayList<SemVer>> vers = lexer.lex();
+
+        // Check resuts.
+        assert vers.isPresent() && vers.get().size() == BUFSIZE;
+        SemVer v = new SemVer(1, 22, 333);
+        for (int i = 0; i < BUFSIZE; ++i) {
+            assert vers.get().get(i).equals(v);
+        }
+
+        // Cleanup: remove input file.
+        file.delete();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.util.Optional;
+
+class Main {
+    static class SemVer {
+        int major;
+        int minor;
+        int patch;
+
+        public SemVer(int m, int n, int k) {
+            major = m;
+            minor = n;
+            patch = k;
+        }
+
+        public boolean equals(SemVer v) {
+            return major == v.major && minor == v.minor && patch == v.patch;
+        }
+    };
+
+    static Optional<SemVer> parse(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+
+        // Final tag variables available in semantic action.
+        /*!svars:re2c format = \(dqint @@;\(dq; */
+
+        // Intermediate tag variables used by the lexer (must be autogenerated).
+        /*!stags:re2c format = \(dqint @@ = \-1;\(dq; */
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:yyfill:enable = 0;
+            re2c:captvars = 1;
+
+            num = [0\-9]+;
+
+            (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+                int major = Integer.valueOf(yyinput.substring(yytl1, yytr1));
+                int minor = Integer.valueOf(yyinput.substring(yytl2, yytr2));
+                int patch = (yytl3 == \-1) ? 0
+                        : Integer.valueOf(yyinput.substring(yytl3 + 1, yytr3));
+                return Optional.of(new SemVer(major, minor, patch));
+            }
+            * { return Optional.empty(); }
+        */
+    }
+
+    public static void main(String []args) {
+        assert parse(\(dq23.34\e0\(dq).get().equals(new SemVer(23, 34, 0));
+        assert parse(\(dq1.2.99999\e0\(dq).get().equals(new SemVer(1, 2, 99999));
+        assert !parse(\(dq1.a\e0\(dq).isPresent();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+import java.util.*;
+
+class Main {
+    static Optional<int[]> parse(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+
+        // Final tag variables available in semantic action.
+        /*!svars:re2c format = \(dqint @@;\(dq; */
+        /*!mvars:re2c format = \(dqList<Integer> @@;\(dq; */
+
+        // Intermediate tag variables used by the lexer (must be autogenerated).
+        /*!stags:re2c format = \(dqint @@ = \-1;\(dq; */
+        /*!mtags:re2c format = \(dqList<Integer> @@ = new ArrayList<>();\(dq; */
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:define:YYMTAGP = \(dq@@.add(yycursor);\(dq;
+            re2c:define:YYMTAGN = \(dq\(dq; // do nothing
+            re2c:yyfill:enable = 0;
+            re2c:tags = 1;
+
+            num = [0\-9]+;
+
+            @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+                int[] vers = new int[t3.size() + 1];
+                vers[0] = Integer.valueOf(yyinput.substring(t1, t2));
+                for (int i = 0; i < t3.size(); ++i) {
+                    vers[i + 1] = Integer.valueOf(yyinput.substring(t3.get(i), t4.get(i)));
+                }
+                return Optional.of(vers);
+            }
+            * { return Optional.empty(); }
+        */
+    }
+
+    public static void main(String []args) {
+        assert Arrays.equals(parse(\(dq1\e0\(dq).get(), new int[]{1});
+        assert Arrays.equals(parse(\(dq1.2.3.4.5.6.7\e0\(dq).get(), new int[]{1, 2, 3, 4, 5, 6, 7});
+        assert !parse(\(dq1.2.\e0\(dq).isPresent();
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT \-\-utf8 \-s
+
+/*!include:re2c \(dqunicode_categories.re\(dq */
+
+class Main {
+    static boolean lex(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:yyfill:enable = 0;
+
+            // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+            // (see https://unicode.org/reports/tr31)
+            id_start    = L | Nl | [$_];
+            id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+            identifier  = id_start+;
+            // It should be \(gaid_start id_continue*\(ga, but that causes \(gaerror: code too large\(ga
+
+            identifier { return true; }
+            *          { return false; }
+        */
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq_Ыдентификатор\e0\(dq);
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.java)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+/*!re2c
+    number = [1\-9][0\-9]*;
+*/
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { return Num.FLOAT; }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT
+
+/*!include:re2c \(dqdefinitions.java\(dq */
+
+class Main {
+    enum Num {INT, FLOAT, NAN};
+
+    static Num lex(String yyinput) {
+        int yycursor = 0;
+        int yymarker = 0;
+
+        /*!re2c
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyinput.charAt(yycursor)\(dq;
+            re2c:yyfill:enable = 0;
+
+            *      { return Num.NAN; }
+            number { return Num.INT; }
+            !include \(dqextra_rules.re.inc\(dq;
+        */
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dq123\e0\(dq) == Num.INT;
+        assert lex(\(dq123.4567\e0\(dq) == Num.FLOAT;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2java $INPUT \-o $OUTPUT \-\-header lexer/state.java
+
+package headers;
+
+import headers.lexer.State;
+
+/*!header:re2c:on*/
+package headers.lexer;
+
+public class State {
+    public String yyinput;
+    public int yycursor;
+    /*!stags:re2c format = \(dqpublic int @@;\en\(dq; */
+
+    public State(String str) {
+        yyinput = str;
+        yycursor = 0;
+        /*!stags:re2c format = \(dq@@ = 0;\en\(dq; */
+    }
+};
+/*!header:re2c:off*/
+
+class Main {
+    static int lex(String str) {
+        State yyrecord = new State(str);
+        int t;
+        /*!re2c
+            re2c:api = record;
+            re2c:tags = 1;
+            re2c:yyfill:enable = 0;
+            re2c:define:YYCTYPE = \(dqchar\(dq;
+            re2c:define:YYPEEK = \(dqyyrecord.yyinput.charAt(yyrecord.yycursor)\(dq;
+            re2c:header = \(dqlexer/state.java\(dq;
+
+            [a]* @t [b]* { return t; }
+        */
+    }
+
+    public static void main(String []args) {
+        assert lex(\(dqab\e0\(dq) == 1;
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2c
+
+package headers.lexer;
+
+public class State {
+    public String yyinput;
+    public int yycursor;
+    public int yyt1;
+
+
+    public State(String str) {
+        yyinput = str;
+        yycursor = 0;
+        yyt1 = 0;
+
+    }
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2js.1 b/bootstrap/doc/re2js.1
new file mode 100644
index 000000000..f2f15edd2
--- /dev/null
+++ b/bootstrap/doc/re2js.1
@@ -0,0 +1,3327 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for JavaScript, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+function lex(yyinput) {
+    let yycursor = 0;
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        number = [1\-9][0\-9]*;
+
+        number { return true; }
+        *      { return false; }
+    */
+}
+
+if (!lex(\(dq1234\e0\(dq)) {
+    throw \(dqerror!\(dq
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2js
+// re2js $INPUT \-o $OUTPUT
+
+function lex(yyinput) {
+    let yycursor = 0;
+    
+{
+    let yych = 0
+    let yystate = 0
+    yyl: while (true) {
+        switch (yystate) {
+            case 0:
+                yych = yyinput.charCodeAt(yycursor)
+                yycursor += 1;
+                switch (yych) {
+                    case 0x31:
+                    case 0x32:
+                    case 0x33:
+                    case 0x34:
+                    case 0x35:
+                    case 0x36:
+                    case 0x37:
+                    case 0x38:
+                    case 0x39:
+                        yystate = 2
+                        continue yyl
+                    default:
+                        yystate = 1
+                        continue yyl
+                }
+            case 1:
+                { return false; }
+            case 2:
+                yych = yyinput.charCodeAt(yycursor)
+                switch (yych) {
+                    case 0x30:
+                    case 0x31:
+                    case 0x32:
+                    case 0x33:
+                    case 0x34:
+                    case 0x35:
+                    case 0x36:
+                    case 0x37:
+                    case 0x38:
+                    case 0x39:
+                        yycursor += 1;
+                        yystate = 2
+                        continue yyl
+                    default:
+                        yystate = 3
+                        continue yyl
+                }
+            case 3:
+                { return true; }
+            default:
+                throw \(dqinternal lexer error\(dq
+        }
+    }
+}
+
+}
+
+if (!lex(\(dq1234\e0\(dq)) {
+    throw \(dqerror!\(dq
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2js has three API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBSimple API\fP
+This is the default API for the JavaScript backend. It consists of the
+following primitives: \fBYYINPUT\fP (which should be defined as a sequence of
+code units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
+\fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBRecord API\fP
+Record API is useful in cases when lexer state must be stored in an object.
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
+name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
+defined as an object with properties \fByyinput\fP, \fByycursor\fP,
+\fByymarker\fP, \fByyctxmarker\fP, \fByylimit\fP (only the fields used by the
+generated code need to be defined, and their names can be configured).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+// expects a null\-terminated string
+function lex(yyinput) {
+    let yycursor = 0;
+    let count = 0
+
+    loop: while (true) {
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        *      { return \-1 }
+        [\ex00] { return count }
+        [ ]+   { continue loop }
+        [a\-z]+ { count += 1; continue loop }
+    */
+    }
+}
+
+function test(s, n) { if (lex(s) != n) throw \(dqerror!\(dq; }
+test(\(dq\e0\(dq, 0)
+test(\(dqone two three\e0\(dq, 3)
+test(\(dqf0ur\e0\(dq, \-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+// expects a null\-terminated string
+function lex(yyinput) {
+    let yycursor = 0;
+    let yylimit = yyinput.length \- 1 // terminating null not included
+    let count = 0
+
+    loop: while (true) {
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:eof = 0;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        *    { return \-1 }
+        $    { return count }
+        [ ]+ { continue loop }
+        str  { count += 1; continue loop }
+    */
+    }
+}
+
+function test(s, n) { if (lex(s) != n) throw \(dqerror!\(dq; }
+test(\(dq\e0\(dq, 0)
+test(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \e0\(dq, 3)
+test(\(dq\(aqunterminated\e\e\(aq\e0\(dq, \-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+/*!max:re2c*/
+
+function lex(yyinput) {
+    let yycursor = 0;
+    let yylimit = yyinput.length
+    let count = 0
+
+    loop: while (true) {
+    /*!re2c
+        re2c:define:YYFILL = \(dqreturn \-1\(dq;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        [\ex00] {
+            // check that it is the sentinel, not some unexpected null
+            return (yycursor == yylimit \- YYMAXFILL + 1) ? count : \-1
+        }
+        str  { count += 1; continue loop }
+        [ ]+ { continue loop }
+        *    { return \-1 }
+    */
+    }
+}
+
+function test(s, n) {
+    let padded_s = s + \(dq\e0\(dq.repeat(YYMAXFILL)
+    if (lex(padded_s) != n) throw \(dqerror!\(dq
+}
+
+test(\(dq\(dq, 0)
+test(\(dq\(aqunterminated\e\e\(aq\(dq, \-1)
+test(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq, 3)
+test(\(dq\(aqunexpected \e0 null\(dq, \-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+// expects a string without terminating null
+function lex(str) {
+    let cur = 0;
+    let lim = str.length
+    let count = 0
+
+    loop: while (true) {
+    /*!re2c
+        re2c:api = generic;
+        re2c:define:YYPEEK = \(dqcur < lim ? str.charCodeAt(cur) : 0\(dq;
+        re2c:define:YYSKIP = \(dqcur += 1\(dq;
+        re2c:yyfill:enable = 0;
+
+        *      { return \-1 }
+        [\ex00] { return count }
+        [ ]+   { continue loop }
+        [a\-z]+ { count += 1; continue loop }
+    */
+    }
+}
+
+function test(s, n) { if (lex(s) != n) throw \(dqerror!\(dq; }
+test(\(dq\(dq, 0)
+test(\(dqone two three\(dq, 3)
+test(\(dqf0ur\(dq, \-1)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const fs = require(\(aqfs\(aq)
+
+const BUFSIZE = 4096
+const OK = 0
+const EOF = 1
+const LONG_LEXEME = 2
+
+function fill(st) {
+    if (st.eof) return EOF
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (st.token < 1) return LONG_LEXEME
+
+    // Shift buffer contents (discard everything up to the current token).
+    st.yyinput.copy(st.yyinput, 0, st.token, st.yylimit)
+    st.yycursor \-= st.token;
+    st.yymarker \-= st.token;
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Read a new chunk of data from file and append it to \(gayyinput\(ga.
+    let want = BUFSIZE \- st.yylimit \- 1 // \-1 for sentinel
+    let nread = fs.readSync(st.file, st.yyinput, st.yylimit, want)
+    st.eof = nread < want // end of file?
+    st.yylimit += nread
+    st.yyinput.writeUInt8(0, st.yylimit) // sentinel
+
+    return OK
+}
+
+function lex(yyrecord, count) {
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor
+        /*!re2c
+            re2c:api = record;
+            re2c:define:YYPEEK = \(dqreadUInt8\(dq;
+            re2c:define:YYFILL = \(dqfill(yyrecord) == OK\(dq;
+            re2c:eof = 0;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            *    { return \-1 }
+            $    { return count }
+            [ ]+ { continue loop }
+            str  { count += 1; continue loop }
+        */
+    }
+}
+
+function main() {
+    let fname = \(dqinput\(dq
+
+    // Create input file.
+    let content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq.repeat(BUFSIZE)
+    fs.writeFileSync(fname, content, function(err) { if (err) throw err; })
+
+    // Init lexer state.
+    let limit = BUFSIZE \- 1 // exclude terminating null
+    let st = {
+        file: fs.openSync(fname, \(aqr\(aq),
+        yyinput: Buffer.alloc(BUFSIZE),
+        yylimit: limit,
+        yycursor: limit,
+        yymarker: limit,
+        token: limit,
+        eof: false
+    }
+
+    // Run lexer on the prepared file.
+    if (lex(st, 0) != 3 * BUFSIZE) { throw \(dqerror :[\(dq }
+
+    // Cleanup.
+    fs.unlink(fname, function(err){ if (err) throw err; })
+}
+
+main()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const fs = require(\(aqfs\(aq)
+
+const BUFSIZE = 4096
+const OK = 0
+const EOF = 1
+const LONG_LEXEME = 2
+/*!max:re2c*/
+
+function fill(st, need) {
+    if (st.eof) return EOF
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (st.token < need) return LONG_LEXEME
+
+    // Shift buffer contents (discard everything up to the current token).
+    st.yyinput.copy(st.yyinput, 0, st.token, st.yylimit)
+    st.yycursor \-= st.token;
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Read a new chunk of data from file and append it to \(gayyinput\(ga.
+    let want = BUFSIZE \- st.yylimit \- 1 // \-1 for sentinel
+    let nread = fs.readSync(st.file, st.yyinput, st.yylimit, want)
+    st.yylimit += nread
+    if (nread < want) {
+        st.eof = true // end of file
+        st.yyinput.write(\(dq\e0\(dq.repeat(YYMAXFILL), st.yylimit)
+        st.yylimit += YYMAXFILL
+    }
+
+    return OK
+}
+
+function lex(yyrecord, count) {
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor
+        /*!re2c
+            re2c:api = record;
+            re2c:define:YYPEEK = \(dqreadUInt8\(dq;
+            re2c:define:YYFILL = \(dqif (fill(yyrecord, @@) != OK) return \-1;\(dq;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            [\ex00] {
+                // Check that it is the sentinel, not some unexpected null.
+                return yyrecord.token == yyrecord.yylimit \- YYMAXFILL ? count : \-1
+            }
+            str  { count += 1; continue loop }
+            [ ]+ { continue loop }
+            *    { return \-1 }
+        */
+    }
+}
+
+function main() {
+    let fname = \(dqinput\(dq
+
+    // Create input file.
+    let content = \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq.repeat(BUFSIZE)
+    fs.writeFileSync(fname, content, function(err) { if (err) throw err; })
+
+    // Init lexer state.
+    let limit = BUFSIZE \- 1 // exclude terminating null
+    let st = {
+        file: fs.openSync(fname, \(aqr\(aq),
+        yyinput: Buffer.alloc(BUFSIZE),
+        yylimit: limit,
+        yycursor: limit,
+        token: limit,
+        eof: false
+    }
+
+    // Run lexer on the prepared file.
+    if (lex(st, 0) != 3 * BUFSIZE) { throw \(dqerror :[\(dq }
+
+    // Cleanup.
+    fs.unlink(fname, function(err){ if (err) throw err; })
+}
+
+main()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+/*!re2c // Common re2c definitions shared between all functions.
+    re2c:api = record;
+    re2c:variable:yyrecord = st;
+    re2c:yyfill:enable = 0;
+*/
+
+function parse_u32(str) {
+    let st = {
+        yyinput: str,
+        yycursor: 0,
+        yymarker: 0
+    }
+    /*!re2c
+        \(aq0b\(aq / [01]        { return parse_bin(st) }
+        \(dq0\(dq                { return parse_oct(st) }
+        \(dq\(dq / [1\-9]         { return parse_dec(st) }
+        \(aq0x\(aq / [0\-9a\-fA\-F] { return parse_hex(st) }
+        *                  { return null }
+    */
+}
+
+function parse_bin(st) {
+    n = 0
+    loop: while (true) {
+    /*!re2c
+        [01] { n = n * 2 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 48); continue loop }
+        *    { return n }
+    */
+    }
+}
+
+function parse_oct(st) {
+    n = 0
+    loop: while (true) {
+    /*!re2c
+        [0\-7] { n = n * 8 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 48); continue loop }
+        *     { return n }
+    */
+    }
+}
+
+function parse_dec(st) {
+    n = 0
+    loop: while (true) {
+    /*!re2c
+        [0\-9] { n = n * 10 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 48); continue loop }
+        *     { return n }
+    */
+    }
+}
+
+function parse_hex(st) {
+    n = 0
+    loop: while (true) {
+    /*!re2c
+        [0\-9] { n = n * 16 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 48); continue loop }
+        [a\-f] { n = n * 16 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 87); continue loop }
+        [A\-F] { n = n * 16 + (st.yyinput.charCodeAt(st.yycursor \- 1) \- 55); continue loop }
+        *     { return n }
+    */
+    }
+}
+
+function test(s, n) {
+    if (parse_u32(s) != n) throw \(dqerror!\(dq
+}
+
+test(\(dq\e0\(dq, null)
+test(\(dq1234567890\e0\(dq, 1234567890)
+test(\(dq0b1101\e0\(dq, 13)
+test(\(dq0x7Fe\e0\(dq, 2046)
+test(\(dq0644\e0\(dq, 420)
+test(\(dq9999999999\e0\(dq, 9999999999)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT \-c
+
+/*!conditions:re2c*/
+
+function parse_u32(yyinput) {
+    let yycursor = 0
+    let yycond = YYC_INIT
+    let n = 0
+
+    loop: while (true) {
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:indent:top = 2;
+
+        <INIT> \(aq0b\(aq / [01]        :=> BIN
+        <INIT> \(dq0\(dq                :=> OCT
+        <INIT> \(dq\(dq / [1\-9]         :=> DEC
+        <INIT> \(aq0x\(aq / [0\-9a\-fA\-F] :=> HEX
+        <INIT> * { return null }
+
+        <BIN> [01]  { n = n * 2 + (yyinput.charCodeAt(yycursor \- 1) \- 48); continue loop }
+        <OCT> [0\-7] { n = n * 8 + (yyinput.charCodeAt(yycursor \- 1) \- 48); continue loop }
+        <DEC> [0\-9] { n = n * 10 + (yyinput.charCodeAt(yycursor \- 1) \- 48); continue loop }
+        <HEX> [0\-9] { n = n * 16 + (yyinput.charCodeAt(yycursor \- 1) \- 48); continue loop }
+        <HEX> [a\-f] { n = n * 16 + (yyinput.charCodeAt(yycursor \- 1) \- 87); continue loop }
+        <HEX> [A\-F] { n = n * 16 + (yyinput.charCodeAt(yycursor \- 1) \- 55); continue loop }
+
+        <BIN, OCT, DEC, HEX> * { return n }
+    */
+    }
+}
+
+function test(s, n) {
+    if (parse_u32(s) != n) throw \(dqerror!\(dq
+}
+
+test(\(dq\e0\(dq, null)
+test(\(dq1234567890\e0\(dq, 1234567890)
+test(\(dq0b1101\e0\(dq, 13)
+test(\(dq0x7Fe\e0\(dq, 2046)
+test(\(dq0644\e0\(dq, 420)
+test(\(dq9999999999\e0\(dq, 9999999999)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT \-f
+
+const fs = require(\(aqfs\(aq)
+
+// Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+// In real world use a larger buffer.
+const BUFSIZE = 10
+const DEBUG = false
+const END = 0
+const READY = 1
+const WAITING = 2
+const BIG_PACKET = 3
+const BAD_PACKET = 4
+
+function log() {
+    if (DEBUG) console.log.apply(console, arguments)
+}
+
+function fill(st) {
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (st.token < 1) return BIG_PACKET
+
+    // Shift buffer contents (discard everything up to the current token).
+    st.yyinput.copy(st.yyinput, 0, st.token, st.yylimit)
+    st.yycursor \-= st.token;
+    st.yymarker \-= st.token;
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Read a new chunk of data from file and append it to \(gayyinput\(ga.
+    let want = BUFSIZE \- st.yylimit \- 1 // \-1 for sentinel
+    let nread = fs.readSync(st.file, st.yyinput, st.yylimit, want)
+    st.yylimit += nread
+    st.yyinput.writeUInt8(0, st.yylimit) // sentinel
+
+    return READY
+}
+
+function lex(yyrecord) {
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor
+        /*!re2c
+            re2c:api = record;
+            re2c:define:YYPEEK = \(dqreadUInt8\(dq;
+            re2c:define:YYFILL = \(dqreturn WAITING\(dq;
+            re2c:eof = 0;
+
+            packet = [a\-z]+[;];
+
+            *      { return BAD_PACKET }
+            $      { return END }
+            packet { yyrecord.received += 1; continue loop }
+        */
+    }
+}
+
+function test(packets, expect) {
+    // Emulate a \(dqpipe\(dq by opening the same file for reading and writing.
+    let fname = \(dqinput\(dq
+    let fw = fs.openSync(fname, \(aqw\(aq);
+    let fr = fs.openSync(fname, \(aqr\(aq);
+
+    // Init lexer state.
+    let limit = BUFSIZE \- 1 // exclude terminating null
+    let st = {
+        file: fr,
+        yyinput: Buffer.alloc(BUFSIZE),
+        yylimit: limit,
+        yycursor: limit,
+        yymarker: limit,
+        token: limit,
+        yystate: \-1,
+        received: 0
+    }
+
+    // Main loop. The buffer contains incomplete data which appears packet by
+    // packet. When the lexer needs more input it saves its internal state and
+    // returns to the caller which should provide more input and resume lexing.
+    let send = 0
+    let status
+    loop: while (true) {
+        status = lex(st)
+
+        if (status == END) {
+            log(\(dqdone: got\(dq, st.received, \(dqpackets\(dq)
+            break loop
+        } else if (status == WAITING) {
+            log(\(dqwaiting...\(dq);
+
+            if (send < packets.length) {
+                log(\(dqsent packet\(dq, send, packets[send])
+                fs.writeFileSync(fw, packets[send])
+                send += 1
+            }
+
+            status = fill(st)
+            log(\(dqqueue:\(dq, st.yyinput.toString())
+            if (status == BIG_PACKET) {
+                log(\(dqerror: packet too big\(dq)
+                break loop
+            }
+
+            if (status != READY) throw \(dqexpected READY\(dq
+        } else {
+            if (status != BAD_PACKET) throw \(dqexpected BAD_PACKET\(dq
+            log(\(dqerror: ill\-formed packet\(dq)
+            break loop
+        }
+    }
+
+    // Check results.
+    if (status != expect) throw \(dqunexpected status\(dq
+    if (status == END && st.received != send) \(dqunexpected packet count\(dq
+
+    // Cleanup.
+    fs.unlinkSync(fname, function(err){ if (err) throw err; })
+}
+
+function main() {
+    test([], END)
+    test([\(dqzero;\(dq, \(dqone;\(dq, \(dqtwo;\(dq, \(dqthree;\(dq, \(dqfour;\(dq], END)
+    test([\(dqzer0;\(dq], BAD_PACKET)
+    test([\(dqgoooooooooogle;\(dq], BIG_PACKET)
+}
+
+main()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+// This example shows how to combine reusable re2c blocks: two blocks
+// (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+// in both blocks; the \(aqfish\(aq block takes priority because it is used
+// earlier. Default rule * occurs in all three blocks; the local (not
+// inherited) definition takes priority.
+
+const COLOR = 1
+const FISH = 2
+const DUNNO = 3
+
+/*!rules:re2c:colors
+    *                            { throw \(dqah\(dq }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { return COLOR }
+*/
+
+/*!rules:re2c:fish
+    *                            { throw \(dqoh\(dq }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { return FISH }
+*/
+
+function lex(yyinput) {
+    let yycursor = 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        !use:fish;
+        !use:colors;
+        * { return DUNNO } // overrides inherited \(aq*\(aq rules
+    */
+}
+
+function test(s, n) { if (lex(s) != n) throw \(dqerror!\(dq; }
+
+test(\(dqsalmon\(dq, FISH)
+test(\(dqwhat?\(dq, DUNNO)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+
+// This example supports multiple input encodings: UTF\-8 and UTF\-32.
+// Both lexers are generated from the same rules block, and the use
+// blocks add only encoding\-specific configurations.
+/*!rules:re2c
+    re2c:yyfill:enable = 0;
+    re2c:define:YYPEEK = \(dqat\(dq;
+
+    \(dq∀x ∃y\(dq { return yycursor }
+    *       { return null }
+*/
+
+function lex_utf8(yyinput) {
+    let yycursor = 0
+    /*!use:re2c
+        re2c:encoding:utf8 = 1;
+    */
+}
+
+function lex_utf32(yyinput) {
+    let yycursor = 0
+    /*!use:re2c
+        re2c:encoding:utf32 = 1;
+    */
+}
+
+function test(f, s) {
+    if (f(s) != s.length) throw \(dqerror!\(dq
+}
+
+test(lex_utf8, [0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79])
+test(lex_utf32, [0x2200, 0x78, 0x20, 0x2203, 0x79])
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const assert = require(\(aqassert\(aq);
+
+function parse(yyinput) {
+    let yycursor = 0
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(dqlet @@\en\(dq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(dqlet @@\en\(dq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+            return {
+                major: Number(yyinput.substring(t1, t2)),
+                minor: Number(yyinput.substring(t3, t4)),
+                patch: t5 == \-1 ? 0 : Number(yyinput.substring(t5, yycursor \- 1))
+            }
+        }
+        * { return null }
+    */
+}
+
+assert.deepEqual(parse(\(dq23.34\e0\(dq), {major: 23, minor: 34, patch: 0})
+assert.deepEqual(parse(\(dq1.2.99999\e0\(dq), {major: 1, minor: 2, patch: 99999})
+assert.deepEqual(parse(\(dq1.a\e0\(dq), null)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const assert = require(\(aqassert\(aq);
+const fs = require(\(aqfs\(aq)
+
+const BUFSIZE = 4096
+const OK = 0
+const EOF = 1
+const LONG_LEXEME = 2
+
+function fill(st) {
+    if (st.eof) return EOF
+
+    // Error: lexeme too long. In real life could reallocate a larger buffer.
+    if (st.token < 1) return LONG_LEXEME
+
+    // Shift buffer contents (discard everything up to the current token).
+    st.yyinput.copy(st.yyinput, 0, st.token, st.yylimit)
+    st.yycursor \-= st.token;
+    st.yymarker \-= st.token;
+    st.yylimit \-= st.token;
+    /*!stags:re2c format = \(dqif (st.@@ != \-1) st.@@ \-= st.token\en\(dq; */
+    st.token = 0;
+
+    // Read a new chunk of data from file and append it to \(gayyinput\(ga.
+    let want = BUFSIZE \- st.yylimit \- 1 // \-1 for sentinel
+    let nread = fs.readSync(st.file, st.yyinput, st.yylimit, want)
+    st.eof = nread < want // end of file?
+    st.yylimit += nread
+    st.yyinput.writeUInt8(0, st.yylimit) // sentinel
+
+    return OK
+}
+
+function lex(st) {
+    let vers = []
+    loop: while (true) {
+        st.token = st.yycursor
+
+        // Final tag variables available in semantic action.
+        /*!svars:re2c format = \(dqlet @@\en\(dq; */
+
+        /*!re2c
+            re2c:api = record;
+            re2c:variable:yyrecord = st;
+            re2c:define:YYPEEK = \(dqreadUInt8\(dq;
+            re2c:define:YYFILL = \(dqfill(st) == OK\(dq;
+            re2c:eof = 0;
+            re2c:tags = 1;
+
+            num = [0\-9]+;
+
+            num @t1 \(dq.\(dq @t2 num @t3 (\(dq.\(dq @t4 num)? [\en] {
+                vers.push({
+                    major: Number(st.yyinput.subarray(st.token, t1)),
+                    minor: Number(st.yyinput.subarray(t2, t3)),
+                    patch: t4 == \-1 ? 0 : Number(st.yyinput.subarray(t4, st.yycursor \- 1))
+                })
+                continue loop
+            }
+            $ { return vers }
+            * { return null }
+        */
+    }
+}
+
+function main() {
+    let fname = \(dqinput\(dq
+
+    // Create input file.
+    let content = \(dq1.22.333\en\(dq.repeat(BUFSIZE)
+    fs.writeFileSync(fname, content, function(err) { if (err) throw err; })
+
+    // Init lexer state.
+    let limit = BUFSIZE \- 1 // exclude terminating null
+    let st = {
+        file: fs.openSync(fname, \(aqr\(aq),
+        yyinput: Buffer.alloc(BUFSIZE),
+        yylimit: limit,
+        yycursor: limit,
+        yymarker: limit,
+        token: limit,
+        // Intermediate tag variables used by the lexer (must be autogenerated).
+        /*!stags:re2c format = \(dq@@: \-1,\en\(dq; */
+        eof: false
+    }
+
+    // Run lexer on the prepared file.
+    assert.deepEqual(lex(st), Array(BUFSIZE).fill({major: 1, minor: 22, patch: 333}))
+
+    // Cleanup.
+    fs.unlink(fname, function(err){ if (err) throw err; })
+}
+
+main()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const assert = require(\(aqassert\(aq);
+
+function parse(yyinput) {
+    let yycursor = 0
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(dqlet @@\en\(dq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(dqlet @@\en\(dq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:captvars = 1;
+
+        num = [0\-9]+;
+
+        (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+            return {
+                major: Number(yyinput.substring(yytl1, yytr1)),
+                minor: Number(yyinput.substring(yytl2, yytr2)),
+                patch: yytl3 == \-1 ? 0 : Number(yyinput.substring(yytl3 + 1, yytr3))
+            }
+        }
+        * { return null }
+    */
+}
+
+assert.deepEqual(parse(\(dq23.34\e0\(dq), {major: 23, minor: 34, patch: 0})
+assert.deepEqual(parse(\(dq1.2.99999\e0\(dq), {major: 1, minor: 2, patch: 99999})
+assert.deepEqual(parse(\(dq1.a\e0\(dq), null)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+const assert = require(\(aqassert\(aq)
+
+function parse(yyinput) {
+    let yycursor = 0
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(dqlet @@\en\(dq; */
+    /*!mvars:re2c format = \(dqlet @@\en\(dq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(dqlet @@\en\(dq; */
+    /*!mtags:re2c format = \(dqlet @@ = []\en\(dq; */
+
+    /*!re2c
+        re2c:define:YYMTAGP = \(dq@@.push(yycursor)\(dq;
+        re2c:define:YYMTAGN = \(dq\(dq; // do nothing
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+            let vers = [Number(yyinput.substring(t1, t2))]
+            for (let i = 0; i < t3.length; ++i) {
+                vers.push(Number(yyinput.substring(t3[i], t4[i])))
+            }
+            return vers
+        }
+        * { return null }
+    */
+}
+
+assert.deepEqual(parse(\(dq1\e0\(dq), [1])
+assert.deepEqual(parse(\(dq1.2.3.4.5.6.7\e0\(dq), [1, 2, 3, 4, 5, 6, 7])
+assert.deepEqual(parse(\(dq1.2.\e0\(dq), null)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT \-\-utf8 \-s
+
+/*!include:re2c \(dqunicode_categories.re\(dq */
+
+function lex(yyinput) {
+    let yycursor = 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+        // (see https://unicode.org/reports/tr31)
+        id_start    = L | Nl | [$_];
+        id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+        identifier  = id_start id_continue*;
+
+        identifier { return true }
+        *          { return false }
+    */
+}
+
+if (!lex(\(dq_Ыдентификатор\e0\(dq)) throw \(dqerror!\(dq
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.js)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+const INT = 1
+const FLOAT = 2
+const NAN = 3
+
+/*!re2c
+    number = [1\-9][0\-9]*;
+*/
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { return FLOAT }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT
+
+/*!include:re2c \(dqdefinitions.js\(dq */
+
+function lex(yyinput) {
+    let yycursor = 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        *      { return NAN }
+        number { return INT }
+        !include \(dqextra_rules.re.inc\(dq;
+    */
+}
+
+function test(s, n) {
+    if (lex(s) != n) throw \(dqerror!\(dq
+}
+
+test(\(dq123\e0\(dq, INT)
+test(\(dq123.4567\e0\(dq, FLOAT)
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2js $INPUT \-o $OUTPUT \-\-header lexer/state.js
+
+let state = require(\(aq./lexer/state.js\(aq);
+
+/*!header:re2c:on*/
+exports.mk_state = function(str) {
+    return {
+        yyinput: str,
+        /*!stags:re2c format = \(dq@@: 0,\en\(dq; */
+        yycursor: 0
+    }
+}
+/*!header:re2c:off*/
+
+function lex(yyrecord) {
+    let t
+    /*!re2c
+        re2c:api = record;
+        re2c:tags = 1;
+        re2c:yyfill:enable = 0;
+        re2c:header = \(dqlexer/state.js\(dq;
+
+        [a]* @t [b]* { return t }
+    */
+}
+
+if (lex(state.mk_state(\(dqab\e0\(dq)) != 1) {
+    throw \(dqerror!\(dq
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2c
+
+exports.mk_state = function(str) {
+    return {
+        yyinput: str,
+        yyt1: 0,
+
+        yycursor: 0
+    }
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2ocaml.1 b/bootstrap/doc/re2ocaml.1
new file mode 100644
index 000000000..36970e932
--- /dev/null
+++ b/bootstrap/doc/re2ocaml.1
@@ -0,0 +1,3487 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for OCaml, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-i *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+}
+
+%{
+    re2c:define:YYFN = [\(dqlex;bool\(dq, \(dqyyrecord;state\(dq];
+    re2c:yyfill:enable = 0;
+
+    number = [1\-9][0\-9]*;
+
+    number { true }
+    *      { false }
+%}
+
+let main () =
+    let st = {yyinput = \(dq1234\ex00\(dq; yycursor = 0}
+    in if not (lex st) then raise (Failure \(dqerror\(dq)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* Generated by re2ocaml *)
+(* re2ocaml $INPUT \-o $OUTPUT \-i *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+}
+
+
+let rec yy0 (yyrecord : state) : bool =
+    let yych = get yyrecord.yyinput yyrecord.yycursor in
+    yyrecord.yycursor <\- yyrecord.yycursor + 1;
+    match yych with
+        | \(aq1\(aq..\(aq9\(aq \-> (yy2 [@tailcall]) yyrecord
+        | _ \-> (yy1 [@tailcall]) yyrecord
+
+and yy1 (yyrecord : state) : bool =
+    false
+
+and yy2 (yyrecord : state) : bool =
+    let yych = get yyrecord.yyinput yyrecord.yycursor in
+    match yych with
+        | \(aq0\(aq..\(aq9\(aq \->
+            yyrecord.yycursor <\- yyrecord.yycursor + 1;
+            (yy2 [@tailcall]) yyrecord
+        | _ \-> (yy3 [@tailcall]) yyrecord
+
+and yy3 (yyrecord : state) : bool =
+    true
+
+and lex (yyrecord : state) : bool =
+    (yy0 [@tailcall]) yyrecord
+
+
+
+let main () =
+    let st = {yyinput = \(dq1234\ex00\(dq; yycursor = 0}
+    in if not (lex st) then raise (Failure \(dqerror\(dq)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2ocaml has two API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBRecord API\fP
+Record API is the default API for the OCaml backend.
+This API consists of a variable \fByyrecord\fP (the name can be overridden with
+\fBre2c:variable:yyrecord\fP) that should be defined as a record with fields
+\fB_yyinput\fP, \fB_yycursor\fP, \fB_yymarker\fP, \fB_yyctxmarker\fP, \fB_yylimit\fP\&.
+Only the fields used by the generated code need to be defined, and their
+names can be configured.
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+}
+
+(* expect a null\-terminated string *)
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;state\(dq, \(dqcount;int\(dq];
+    re2c:yyfill:enable = 0;
+
+    *      { \-1 }
+    [\ex00] { count }
+    [a\-z]+ { lex yyrecord (count + 1) }
+    [ ]+   { lex yyrecord count }
+%}
+
+let test(yyinput, count) =
+    let st = {yyinput = yyinput; yycursor = 0}
+    in if not (lex st 0 = count) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dq\ex00\(dq, 0);
+    test(\(dqone two three\ex00\(dq, 3);
+    test(\(dqf0ur\ex00\(dq, \-1)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    yylimit: int;
+}
+
+(* expect a null\-terminated string *)
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;state\(dq, \(dqcount;int\(dq];
+    re2c:yyfill:enable = 0;
+    re2c:eof = 0;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *    { \-1 }
+    $    { count }
+    str  { lex yyrecord (count + 1) }
+    [ ]+ { lex yyrecord count }
+%}
+
+let test(str, count) =
+    let st = {
+        yyinput = str;
+        yycursor = 0;
+        yymarker = 0;
+        yylimit = length str \- 1; (* terminating null not included *)
+    }
+    in if not (lex st 0 = count) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dq\ex00\(dq, 0);
+    test(\(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \ex00\(dq, 3);
+    test(\(dq\(aqunterminated\e\e\(aq\ex00\(dq, \-1)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+exception Fill
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    yylimit: int;
+}
+
+%{max %}
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;state\(dq, \(dqcount;int\(dq];
+    re2c:define:YYFILL = \(dqraise Fill;\(dq;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    [\ex00] {
+        (* check that it is the sentinel, not some unexpected null *)
+        if yyrecord.yycursor = length yyrecord.yyinput \- yymaxfill + 1 then count else \-1
+    }
+    str  { lex yyrecord (count + 1) }
+    [ ]+ { lex yyrecord count }
+    *    { \-1 }
+%}
+
+let test(str, count) =
+    let buf = cat str (make yymaxfill \(aq\ex00\(aq) in
+    let st = {yyinput = buf; yycursor = 0; yylimit = length buf} in
+    let result = try lex st 0 with Fill \-> \-1 in
+    if not (result = count) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dq\(dq, 0);
+    test(\(dq\(aqunterminated\e\e\(aq\(dq, \-1);
+    test(\(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq, 3);
+    test(\(dq\(aqunexpected \ex00 null\(dq, \-1)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+type state = {
+    str: string;
+    mutable cur: int;
+    lim: int;
+}
+
+(* expect a string without terminating null *)
+%{
+    re2c:api = generic;
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqst;state\(dq, \(dqcount;int\(dq];
+    re2c:define:YYPEEK = \(dqif st.cur < st.lim then st.str.[st.cur] else \(aq\e\ex00\(aq\(dq;
+    re2c:define:YYSKIP = \(dqst.cur <\- st.cur + 1;\(dq;
+    re2c:yyfill:enable = 0;
+
+    *      { \-1 }
+    [\ex00] { count }
+    [a\-z]+ { lex st (count + 1) }
+    [ ]+   { lex st count }
+%}
+
+let test(str, count) =
+    let st = {str = str; cur = 0; lim = String.length str}
+    in if not (lex st 0 = count) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dq\(dq, 0);
+    test(\(dqone two three\(dq, 3);
+    test(\(dqf0ur\(dq, \-1)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open Bytes
+
+let bufsize = 4096
+
+type state = {
+    file: in_channel;
+    yyinput: bytes;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yylimit: int;
+    mutable token: int;
+    mutable eof: bool;
+}
+
+type status = Ok | Eof | LongLexeme
+
+let fill(st: state) : status =
+    if st.eof then Eof else
+
+    (* Error: lexeme too long. In real life could reallocate a larger buffer. *)
+    if st.token < 1 then LongLexeme else (
+
+    (* Shift buffer contents (discard everything up to the current token). *)
+    blit st.yyinput st.token st.yyinput 0 (st.yylimit \- st.token);
+    st.yycursor <\- st.yycursor \- st.token;
+    st.yymarker <\- st.yymarker \- st.token;
+    st.yylimit <\- st.yylimit \- st.token;
+    st.token <\- 0;
+
+    (* Fill free space at the end of buffer with new data from file. *)
+    let n = input st.file st.yyinput st.yylimit (bufsize \- st.yylimit \- 1) in (* \-1 for sentinel *)
+    st.yylimit <\- st.yylimit + n;
+    if n = 0 then
+        st.eof <\- true; (* end of file *)
+        set st.yyinput st.yylimit \(aq\ex00\(aq; (* append sentinel *)
+
+    Ok)
+
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;state\(dq, \(dqcount;int\(dq];
+    re2c:define:YYFILL = \(dqfill yyrecord = Ok\(dq;
+    re2c:eof = 0;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *    { \-1 }
+    $    { count }
+    str  { lex_loop yyrecord (count + 1) }
+    [ ]+ { lex_loop yyrecord count }
+%}
+
+and lex_loop st count =
+    st.token <\- st.yycursor;
+    lex st count
+
+let main () =
+    let fname = \(dqinput\(dq in
+
+    (* Prepare input file. *)
+    Out_channel.with_open_bin fname
+        (fun oc \-> for i = 1 to bufsize do
+            output_string oc \(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq
+        done);
+
+    (* Run lexer on the prepared file. *)
+    In_channel.with_open_bin fname
+        (fun ic \->
+            let yylimit = bufsize \- 1 in
+            let st = {
+                file = ic;
+                yyinput = create bufsize;
+                yycursor = yylimit;
+                yymarker = yylimit;
+                yylimit = yylimit;
+                token = yylimit;
+                eof = false;
+            } in if not (lex_loop st 0 = 3 * bufsize) then
+                raise (Failure \(dqerror\(dq));
+
+    (* Cleanup. *)
+    Sys.remove fname
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open Bytes
+
+%{max %}
+let bufsize = 4096
+
+exception Fill
+
+type state = {
+    file: in_channel;
+    yyinput: bytes;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yylimit: int;
+    mutable token: int;
+    mutable eof: bool;
+}
+
+type status = Ok | Eof | LongLexeme
+
+let fill (st: state) (need: int) : status =
+    if st.eof then Eof else
+
+    (* Error: lexeme too long. In real life could reallocate a larger buffer. *)
+    if st.token < need then LongLexeme else (
+
+    (* Shift buffer contents (discard everything up to the current token). *)
+    blit st.yyinput st.token st.yyinput 0 (st.yylimit \- st.token);
+    st.yycursor <\- st.yycursor \- st.token;
+    st.yymarker <\- st.yymarker \- st.token;
+    st.yylimit <\- st.yylimit \- st.token;
+    st.token <\- 0;
+
+    (* Fill free space at the end of buffer with new data from file. *)
+    let n = input st.file st.yyinput st.yylimit (bufsize \- st.yylimit \- 1) in (* \-1 for sentinel *)
+    st.yylimit <\- st.yylimit + n;
+
+    (* If read zero characters, this is end of input => add zero padding
+       so that the lexer can access characters at the end of buffer. *)
+    if n = 0 then
+        st.eof <\- true; (* end of file *)
+        for i = 0 to (yymaxfill \- 1) do
+            set st.yyinput (st.yylimit + i) \(aq\ex00\(aq;
+            st.yylimit <\- st.yylimit + yymaxfill
+        done;
+
+    Ok)
+
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;state\(dq, \(dqcount;int\(dq];
+    re2c:define:YYFILL = \(dqif not (fill yyrecord @@ = Ok) then raise Fill;\(dq;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    [\ex00] {
+        (* check that it is the sentinel, not some unexpected null *)
+        if yyrecord.token = yyrecord.yylimit \- yymaxfill then count else \-1
+    }
+    str  { lex_loop yyrecord (count + 1) }
+    [ ]+ { lex_loop yyrecord count }
+    *    { \-1 }
+%}
+
+and lex_loop st count =
+    st.token <\- st.yycursor;
+    try lex st count with Fill \-> \-1
+
+let main () =
+    let fname = \(dqinput\(dq in
+
+    (* Prepare input file. *)
+    Out_channel.with_open_bin fname
+        (fun oc \-> for i = 1 to bufsize do
+            output_string oc \(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq
+        done);
+
+    (* Run lexer on the prepared file. *)
+    In_channel.with_open_bin fname
+        (fun ic \->
+            let yylimit = bufsize \- yymaxfill in
+            let st = {
+                file = ic;
+                yyinput = create bufsize;
+                yycursor = yylimit;
+                yymarker = yylimit;
+                yylimit = yylimit;
+                token = yylimit;
+                eof = false;
+            } in if not (lex_loop st 0 = 3 * bufsize) then
+                raise (Failure \(dqerror\(dq));
+
+    (* Cleanup. *)
+    Sys.remove fname
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-i *)
+
+open Int64
+open Option
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+} 
+
+let add (num: int option) (dgt: int) (base: int) : int option =
+    match num with
+        | None \-> None
+        | Some n \->
+            let n\(aq = add (mul (of_int n) (of_int base)) (of_int dgt)
+            in if n\(aq > (of_int32 Int32.max_int) then None else Some (to_int n\(aq)
+
+%{
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:yyfill:enable = 0;
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_bin;int option\(dq, \(dqst;state\(dq, \(dqnum;int option\(dq];
+    [01] { parse_bin st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 2) }
+    *    { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_oct;int option\(dq, \(dqst;state\(dq, \(dqnum;int option\(dq];
+    [0\-7] { parse_oct st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 8) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_dec;int option\(dq, \(dqst;state\(dq, \(dqnum;int option\(dq];
+    [0\-9] { parse_dec st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 10) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse_hex;int option\(dq, \(dqst;state\(dq, \(dqnum;int option\(dq];
+    [0\-9] { parse_hex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 16) }
+    [a\-f] { parse_hex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 87) 16) }
+    [A\-F] { parse_hex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 55) 16) }
+    *     { num }
+%}
+
+%{local
+    re2c:define:YYFN = [\(dqparse;int option\(dq, \(dqst;state\(dq];
+    \(aq0b\(aq / [01]        { parse_bin st (Some 0) }
+    \(dq0\(dq                { parse_oct st (Some 0) }
+    \(dq\(dq / [1\-9]         { parse_dec st (Some 0) }
+    \(aq0x\(aq / [0\-9a\-fA\-F] { parse_hex st (Some 0) }
+    *                  { None }
+%}
+
+let test (yyinput: string) (result: int option) =
+    let st = {yyinput = yyinput; yycursor = 0; yymarker = 0} in
+    if not (parse st = result) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test \(dq\ex00\(dq None;
+    test \(dq1234567890\ex00\(dq (Some 1234567890);
+    test \(dq0b1101\ex00\(dq (Some 13);
+    test \(dq0x7Fe\ex00\(dq (Some 2046);
+    test \(dq0644\ex00\(dq (Some 420);
+    test \(dq9999999999\ex00\(dq None
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-ci *)
+
+open Int64
+open Option
+open String
+
+%{conditions %}
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yycond: yycondtype;
+} 
+
+let add (num: int option) (dgt: int) (base: int) : int option =
+    match num with
+        | None \-> None
+        | Some n \->
+            let n\(aq = add (mul (of_int n) (of_int base)) (of_int dgt)
+            in if n\(aq > (of_int32 Int32.max_int) then None else Some (to_int n\(aq)
+
+%{
+    re2c:define:YYFN = [\(dqparse;int option\(dq, \(dqst;state\(dq, \(dqnum;int option\(dq];
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:yyfill:enable = 0;
+
+    <init> \(aq0b\(aq / [01]        :=> bin
+    <init> \(dq0\(dq                :=> oct
+    <init> \(dq\(dq / [1\-9]         :=> dec
+    <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+    <init> * { None }
+
+    <bin> [01]  { yyfnbin st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 2) }
+    <oct> [0\-7] { yyfnoct st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 8) }
+    <dec> [0\-9] { yyfndec st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 10) }
+    <hex> [0\-9] { yyfnhex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 48) 16) }
+    <hex> [a\-f] { yyfnhex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 87) 16) }
+    <hex> [A\-F] { yyfnhex st (add num (Char.code st.yyinput.[st.yycursor \- 1] \- 55) 16) }
+
+    <bin, oct, dec, hex> * { num }
+%}
+
+let test (yyinput: string) (result: int option) =
+    let st = {yyinput = yyinput; yycursor = 0; yymarker = 0; yycond = YYC_init} in
+    if not (parse st (Some 0) = result) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test \(dq\ex00\(dq None;
+    test \(dq1234567890\ex00\(dq (Some 1234567890);
+    test \(dq0b1101\ex00\(dq (Some 13);
+    test \(dq0x7Fe\ex00\(dq (Some 2046);
+    test \(dq0644\ex00\(dq (Some 420);
+    test \(dq9999999999\ex00\(dq None
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-fi *)
+
+open Bytes
+
+(* Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+   In real world use a larger buffer. *)
+let bufsize = 10
+
+let debug = false
+let log format = (if debug then Printf.eprintf else Printf.ifprintf stderr) format
+
+type state = {
+    file: in_channel;
+    yyinput: bytes;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yylimit: int;
+    mutable token: int;
+    mutable yystate: int;
+    mutable recv: int;
+}
+
+type status = End | Ready | Waiting | BadPacket | BigPacket
+
+let fill(st: state) : status =
+    (* Error: lexeme too long. In real life could reallocate a larger buffer. *)
+    if st.token < 1 then BigPacket else (
+
+    (* Shift buffer contents (discard everything up to the current token). *)
+    blit st.yyinput st.token st.yyinput 0 (st.yylimit \- st.token);
+    st.yycursor <\- st.yycursor \- st.token;
+    st.yymarker <\- st.yymarker \- st.token;
+    st.yylimit <\- st.yylimit \- st.token;
+    st.token <\- 0;
+
+    (* Fill free space at the end of buffer with new data from file. *)
+    let n = In_channel.input st.file st.yyinput st.yylimit (bufsize \- st.yylimit \- 1) in
+    st.yylimit <\- st.yylimit + n;
+    set st.yyinput st.yylimit \(aq\ex00\(aq; (* append sentinel *)
+
+    Ready)
+
+%{
+    re2c:define:YYFN = [\(dqlex;status\(dq, \(dqyyrecord;state\(dq];
+    re2c:define:YYFILL = \(dqWaiting\(dq;
+    re2c:eof = 0;
+
+    packet = [a\-z]+[;];
+
+    *      { BadPacket }
+    $      { End }
+    packet { yyrecord.recv <\- yyrecord.recv + 1; lex_loop yyrecord }
+%}
+
+and lex_loop st =
+    st.token <\- st.yycursor;
+    lex st
+
+let test (packets: string list) (sts: status) =
+    let fname = \(dqpipe\(dq in
+
+    let oc = Out_channel.open_bin fname in
+    let ic = In_channel.open_bin fname in
+
+    let yylimit = bufsize \- 1 in
+    let st = {
+        file = ic;
+        (* Sentinel (at \(gayylimit\(ga offset) is set to null, which triggers YYFILL. *)
+        yyinput = create bufsize;
+        yycursor = yylimit;
+        yymarker = yylimit;
+        yylimit = yylimit;
+        token = yylimit;
+        yystate = \-1;
+        recv = 0;
+    } in
+
+    let rec loop packets = match lex_loop st with
+        | End \->
+            log \(dqdone: got %d packets\en\(dq st.recv;
+            End
+        | Waiting \->
+            log \(dqwaiting...\en\(dq;
+            let packets\(aq = match packets with
+                | [] \-> []
+                | p :: ps \->
+                    log \(dqsent packet \(aq%s\(aq\en\(dq p;
+                    Out_channel.output_string oc p;
+                    Out_channel.flush oc; (* without \(gaflush\(ga write happens too late *)
+                    ps
+            in (match fill st with
+                | BigPacket \->
+                    log \(dqerror: packet too big\en\(dq;
+                    BigPacket
+                | Ready \-> loop packets\(aq
+                | _ \-> raise (Failure \(dqunexpected status after fill\(dq))
+        | BadPacket \->
+            log \(dqerror: ill\-formed packet\en\(dq;
+            BadPacket
+        | _ \-> raise (Failure \(dqunexpected status\(dq)
+
+    in if not (loop packets = sts) then
+        raise (Failure \(dqerror\(dq);
+
+    In_channel.close ic;
+    Out_channel.close oc;
+    Sys.remove fname
+
+let main () =
+    test [] End;
+    test [\(dqzero;\(dq; \(dqone;\(dq; \(dqtwo;\(dq; \(dqthree;\(dq; \(dqfour;\(dq] End;
+    test [\(dqzer0;\(dq] BadPacket;
+    test [\(dqgoooooooooogle;\(dq] BigPacket
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+(* This example shows how to combine reusable re2c blocks: two blocks
+   (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+   in both blocks; the \(aqfish\(aq block takes priority because it is used
+   earlier. Default rule * occurs in all three blocks; the local (not
+   inherited) definition takes priority. *)
+
+open String
+
+type answer = Color | Fish | Dunno
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+}
+
+%{rules:colors
+    *                            { raise (Failure \(dqah\(dq); }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { Color }
+%}
+
+%{rules:fish
+    *                            { raise (Failure \(dqoh\(dq); }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { Fish }
+%}
+
+%{
+    re2c:define:YYFN = [\(dqlex;answer\(dq, \(dqyyrecord;state\(dq];
+    re2c:yyfill:enable = 0;
+
+    !use:fish;
+    !use:colors;
+    * { Dunno } // overrides inherited \(aq*\(aq rules
+%}
+
+let test(str, ans) =
+    let st = {yyinput = str; yycursor = 0; yymarker = 0}
+    in if not (lex st = ans) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dqsalmon\(dq, Fish);
+    test(\(dqwhat?\(dq, Dunno)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-\-input\-encoding utf8 *)
+(* This example supports multiple input encodings: UTF\-8 and UTF\-32.
+   Both lexers are generated from the same rules block, and the use
+   blocks add only encoding\-specific configurations. *)
+
+open Array
+
+type \(aqa state = {
+    yyinput: \(aqa array;
+    mutable yycursor: int;
+    mutable yymarker: int;
+}
+
+%{rules
+    re2c:yyfill:enable = 0;
+
+    \(dq∀x ∃y\(dq { Some yyrecord.yycursor }
+    *       { None }
+%}
+
+%{use
+    re2c:define:YYFN = [\(dqlex8;int option\(dq, \(dqyyrecord;char state\(dq];
+    re2c:encoding:utf8 = 1;
+%}
+
+%{use
+    re2c:define:YYFN = [\(dqlex32;int option\(dq, \(dqyyrecord;int state\(dq];
+    re2c:encoding:utf32 = 1;
+%}
+
+let main() =
+    let st8 = {
+        yyinput = [|\(aq\exe2\(aq; \(aq\ex08\(aq; \(aq\ex80\(aq; \(aq\ex78\(aq; \(aq\ex20\(aq; \(aq\exe2\(aq; \(aq\ex88\(aq; \(aq\ex83\(aq; \(aq\ex79\(aq|];
+        yycursor = 0;
+        yymarker = 0;
+    } in if not (lex8 st8 = Some (Array.length st8.yyinput)) then raise (Failure \(dqerror\(dq);
+
+    let st32 = {
+        yycursor = 0;
+        yymarker = 0;
+        yyinput = [|0x2200; 0x78; 0x20; 0x2203; 0x79|];
+    } in if not (lex32 st32 = Some (Array.length st32.yyinput)) then raise (Failure \(dqerror\(dq);
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    (* Final tag variables available in semantic action. *)
+    %{svars format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+    (* Intermediate tag variables used by the lexer (must be autogenerated). *)
+    %{stags format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+}
+
+type semver = {
+    major: int;
+    minor: int;
+    patch: int;
+}
+
+let s2n (str: string) (i1: int) (i2: int) : int =
+    let rec f s i j n =
+        if i >= j then n else f s (i + 1) j (n * 10 + Char.code s.[i] \- 48)
+    in f str i1 i2 0
+
+%{local
+    re2c:define:YYFN = [\(dqparse;semver option\(dq, \(dqst;state\(dq];
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+        Some {
+            major = s2n st.yyinput st.t1 st.t2;
+            minor = s2n st.yyinput st.t3 st.t4;
+            patch = if st.t5 = \-1 then 0 else s2n st.yyinput st.t5 (st.yycursor \- 1)
+        }
+    }
+    * { None }
+%}
+
+let test (str: string) (result: semver option) =
+    let st = {
+        yyinput = str;
+        yycursor = 0;
+        yymarker = 0;
+        %{svars format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+        %{stags format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+    }
+    in if not (parse st = result) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test \(dq23.34\ex00\(dq (Some {major = 23; minor = 34; patch = 0});
+    test \(dq1.2.99999\ex00\(dq (Some {major = 1; minor = 2; patch = 99999});
+    test \(dq1.a\ex00\(dq None
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open Bytes
+
+let bufsize = 4096
+
+type state = {
+    file: in_channel;
+    yyinput: bytes;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yylimit: int;
+    mutable token: int;
+    mutable eof: bool;
+    (* Final tag variables available in semantic action. *)
+    %{svars format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+    (* Intermediate tag variables used by the lexer (must be autogenerated). *)
+    %{stags format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+}
+
+type status = Ok | Eof | LongLexeme
+
+type semver = {
+    major: int;
+    minor: int;
+    patch: int;
+}
+
+let s2n (str: bytes) (i1: int) (i2: int) : int =
+    let rec f s i j n =
+        if i >= j then n else f s (i + 1) j (n * 10 + Char.code (get s i) \- 48)
+    in f str i1 i2 0
+
+let fill(st: state) : status =
+    if st.eof then Eof else
+
+    (* Error: lexeme too long. In real life could reallocate a larger buffer. *)
+    if st.token < 1 then LongLexeme else (
+
+    (* Shift buffer contents (discard everything up to the current token). *)
+    blit st.yyinput st.token st.yyinput 0 (st.yylimit \- st.token);
+    st.yycursor <\- st.yycursor \- st.token;
+    st.yymarker <\- st.yymarker \- st.token;
+    st.yylimit <\- st.yylimit \- st.token;
+    %{stags format = \(dq\en\etst.@@ <\- if st.@@ = \-1 then \-1 else st.@@ \- st.token;\(dq; %}
+    st.token <\- 0;
+
+    (* Fill free space at the end of buffer with new data from file. *)
+    let n = input st.file st.yyinput st.yylimit (bufsize \- st.yylimit \- 1) in (* \-1 for sentinel *)
+    st.yylimit <\- st.yylimit + n;
+    if n = 0 then
+        st.eof <\- true; (* end of file *)
+        set st.yyinput st.yylimit \(aq\ex00\(aq; (* append sentinel *)
+
+    Ok)
+
+%{
+    re2c:define:YYFN = [\(dqlex;(semver list) option\(dq, \(dqst;state\(dq, \(dqvers;semver list\(dq];
+    re2c:define:YYFILL = \(dqfill st = Ok\(dq;
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:tags = 1;
+    re2c:eof = 0;
+
+    num = [0\-9]+;
+
+    @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\en] {
+        let ver = {
+            major = s2n st.yyinput st.t1 st.t2;
+            minor = s2n st.yyinput st.t3 st.t4;
+            patch = if st.t5 = \-1 then 0 else s2n st.yyinput st.t5 (st.yycursor \- 1)
+        } in lex_loop st (ver :: vers)
+    }
+    $ { Some (List.rev vers) }
+    * { None }
+%}
+
+and lex_loop st vers =
+    st.token <\- st.yycursor;
+    lex st vers
+
+let main () =
+    let fname = \(dqinput\(dq in
+
+    (* Prepare input file. *)
+    Out_channel.with_open_bin fname
+        (fun oc \-> for i = 1 to bufsize do
+            output_string oc \(dq1.22.333\en\(dq
+        done);
+
+    (* Construct the expected result to compare against. *)
+    let expect = Some (List.init bufsize
+            (fun _ \-> {major = 1; minor = 22; patch = 333;})) in
+
+    (* Run lexer on the prepared file. *)
+    In_channel.with_open_bin fname
+        (fun ic \->
+            let yylimit = bufsize \- 1 in
+            let st = {
+                file = ic;
+                yyinput = create bufsize;
+                yycursor = yylimit;
+                yymarker = yylimit;
+                yylimit = yylimit;
+                token = yylimit;
+                eof = false;
+                %{svars format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+                %{stags format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+            } in if (lex_loop st [] <> expect) then
+                raise (Failure \(dqerror\(dq));
+
+    (* Cleanup. *)
+    Sys.remove fname
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    (* Final tag variables available in semantic action. *)
+    %{svars format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+    (* Intermediate tag variables used by the lexer (must be autogenerated). *)
+    %{stags format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+}
+
+type semver = {
+    major: int;
+    minor: int;
+    patch: int;
+}
+
+let s2n (str: string) (i1: int) (i2: int) : int =
+    let rec f s i j n =
+        if i >= j then n else f s (i + 1) j (n * 10 + Char.code s.[i] \- 48)
+    in f str i1 i2 0
+
+%{local
+    re2c:define:YYFN = [\(dqparse;semver option\(dq, \(dqst;state\(dq];
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:captvars = 1;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+        Some {
+            major = s2n st.yyinput st.yytl1 st.yytr1;
+            minor = s2n st.yyinput st.yytl2 st.yytr2;
+            patch = if st.yytl3 = \-1 then 0 else s2n st.yyinput (st.yytl3 + 1) st.yytr3
+        }
+    }
+    * { None }
+%}
+
+let test (str: string) (result: semver option) =
+    let st = {
+        yyinput = str;
+        yycursor = 0;
+        yymarker = 0;
+        %{svars format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+        %{stags format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+    }
+    in if not (parse st = result) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test \(dq23.34\ex00\(dq (Some {major = 23; minor = 34; patch = 0});
+    test \(dq1.2.99999\ex00\(dq (Some {major = 1; minor = 2; patch = 99999});
+    test \(dq1.a\ex00\(dq None
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT *)
+
+open String
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    (* Final tag variables available in semantic action. *)
+    %{svars format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+    %{mvars format = \(dq\en\etmutable @@{tag}: int list;\(dq; %}
+    (* Intermediate tag variables used by the lexer (must be autogenerated). *)
+    %{stags format = \(dq\en\etmutable @@{tag}: int;\(dq; %}
+    %{mtags format = \(dq\en\etmutable @@{tag}: int list;\(dq; %}
+}
+
+let s2n (str: string) (i1: int) (i2: int) : int =
+    let rec f s i j n =
+        if i >= j then n else f s (i + 1) j (n * 10 + Char.code s.[i] \- 48)
+    in f str i1 i2 0
+
+%{local
+    re2c:define:YYFN = [\(dqparse;(int list) option\(dq, \(dqst;state\(dq];
+    re2c:define:YYMTAGP = \(dq@@ <\- st.yycursor :: @@;\(dq;
+    re2c:define:YYMTAGN = \(dq\(dq; // alternatively could add \(ga\-1\(ga to the list
+    re2c:variable:yyrecord = \(dqst\(dq;
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+
+    num = [0\-9]+;
+
+    @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+        let x = s2n st.yyinput st.t1 st.t2 in
+        let xs = List.rev (List.map2 (fun x y \-> s2n st.yyinput x y) st.t3 st.t4) in
+        Some (x :: xs)
+    }
+    * { None }
+%}
+
+let test (str: string) (result: (int list) option) =
+    let st = {
+        yyinput = str;
+        yycursor = 0;
+        yymarker = 0;
+        %{svars format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+        %{mvars format = \(dq\en\et\et@@{tag} = [];\(dq; %}
+        %{stags format = \(dq\en\et\et@@{tag} = \-1;\(dq; %}
+        %{mtags format = \(dq\en\et\et@@{tag} = [];\(dq; %}
+    }
+    in if not (parse st = result) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test \(dq1\ex00\(dq (Some [1]);
+    test \(dq1.2.3.4.5.6.7\ex00\(dq (Some [1; 2; 3; 4; 5; 6; 7;]);
+    test \(dq1.2.\ex00\(dq None
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-\-utf8 \-i *)
+
+open String
+
+%{include \(dqunicode_categories.re\(dq %}
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yyaccept: int;
+}
+
+%{
+    re2c:define:YYFN = [\(dqlex;bool\(dq, \(dqyyrecord;state\(dq];
+    re2c:yyfill:enable = 0;
+
+    // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+    // (see https://unicode.org/reports/tr31)
+    id_start    = L | Nl | [$_];
+    id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+    identifier  = id_start id_continue*;
+
+    identifier { true }
+    *          { false }
+%}
+
+let main () =
+    let st = {
+        yyinput = \(dq_Ыдентификатор\ex00\(dq;
+        yycursor = 0;
+        yymarker = 0;
+        yyaccept = 0;
+    }
+    in if not (lex st) then raise (Failure \(dqerror\(dq)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.ml)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+type number = Int | Float | NaN
+
+%{
+    number = [1\-9][0\-9]*;
+%}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { Float }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-i *)
+
+open String
+
+%{include \(dqdefinitions.ml\(dq %}
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable yymarker: int;
+    mutable yyaccept: int;
+}
+
+%{
+    re2c:define:YYFN = [\(dqlex;number\(dq, \(dqyyrecord;state\(dq];
+    re2c:yyfill:enable = 0;
+
+    *      { NaN }
+    number { Int }
+    !include \(dqextra_rules.re.inc\(dq;
+%}
+
+let test(str, num) =
+    let st = {yyinput = str; yycursor = 0; yymarker = 0; yyaccept = 0}
+    in if not (lex st = num) then raise (Failure \(dqerror\(dq)
+
+let main () =
+    test(\(dq123\ex00\(dq, Int);
+    test(\(dq123.4567\ex00\(dq, Float)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* re2ocaml $INPUT \-o $OUTPUT \-\-header lexer/state.ml \-i *)
+
+open State
+open String
+
+%{header:on %}
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable tag: int;
+    %{stags format = \(dqmutable @@: int;\(dq; %}
+}
+%{header:off %}
+
+%{
+    re2c:define:YYFN = [\(dqlex;int\(dq, \(dqyyrecord;State.state\(dq];
+    re2c:tags = 1;
+    re2c:yyfill:enable = 0;
+    re2c:header = \(dqlexer/state.ml\(dq;
+
+    [a]* @tag [b]* { yyrecord.tag }
+%}
+
+let main () =
+    let st = {
+        yyinput = \(dqab\ex00\(dq;
+        yycursor = 0;
+        tag = 0;
+        %{stags format = \(dq\en\et@@ = 0;\(dq; %}
+    }
+    in if not (lex st = 1) then raise (Failure \(dqerror\(dq)
+
+let _ = main ()
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+(* Generated by re2c *)
+
+type state = {
+    yyinput: string;
+    mutable yycursor: int;
+    mutable tag: int;
+    
+mutable yyt1: int;
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2rust.1 b/bootstrap/doc/re2rust.1
index a77e627a1..ab99a837d 100644
--- a/bootstrap/doc/re2rust.1
+++ b/bootstrap/doc/re2rust.1
@@ -250,8 +250,8 @@ program:
 .TP
 .B \fBSimple API\fP
 (\fIadded in version 4.0\fP)
-This is a basic API that can be enabled with option \fB\-\-api simple\fP or
-configuration \fBre2c:api = simple\fP\&. It consists of the following
+This is a basic API that can be enabled with \fB\-\-api simple\fP option or
+\fBre2c:api = simple\fP configuration. It consists of the following
 primitives: \fBYYINPUT\fP (which should be defined as a sequence of code
 units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
 \fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
@@ -263,8 +263,8 @@ units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
 .B \fBRecord API\fP
 (\fIadded in version 4.0\fP)
 Record API is useful in cases when lexer state must be stored in a struct.
-It is enabled with option \fB\-\-api record\fP or configuration
-\fBre2c:api = record\fP\&. This API consists of a variable \fByyrecord\fP (the
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
 name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
 defined as a struct with fields \fByyinput\fP, \fByycursor\fP, \fByymarker\fP,
 \fByyctxmarker\fP, \fByylimit\fP (only the fields used by the generated code
@@ -275,9 +275,7 @@ need to be defined, and their names can be configured).
 .sp
 .TP
 .B \fBGeneric API\fP
-(\fIadded in version 0.14\fP)
-This is the default API for the Rust backend. It is enabled with
-\fB\-\-api generic\fP option or \fBre2c:api = generic\fP configuration.
+This is the most flexible API and the default API for the Rust backend.
 This API contains primitives for generic operations:
 \fBYYPEEK\fP,
 \fBYYSKIP\fP,
@@ -2804,53 +2802,64 @@ fn main() {
 .SH SUBMATCH EXTRACTION
 .sp
 re2c has two options for submatch extraction.
-.sp
-The first option is \fB\-T \-\-tags\fP\&. With this option one can use standalone tags
-of the form \fB@stag\fP and \fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary
-used\-defined names. Tags can be used anywhere inside of a regular expression;
-semantically they are just position markers. Tags of the form \fB@stag\fP are
-called s\-tags: they denote a single submatch value (the last input position
-where this tag matched). Tags of the form \fB#mtag\fP are called m\-tags: they
-denote multiple submatch values (the whole history of repetitions of this tag).
-All tags should be defined by the user as variables with the corresponding
-names. With standalone tags re2c uses leftmost greedy disambiguation: submatch
-positions correspond to the leftmost matching path through the regular
-expression.
-.sp
-The second option is \fB\-P \-\-posix\-captures\fP: it enables POSIX\-compliant
-capturing groups. In this mode parentheses in regular expressions denote the
-beginning and the end of capturing groups; the whole regular expression is group
-number zero. The number of groups for the matching rule is stored in a variable
-\fByynmatch\fP, and submatch results are stored in \fByypmatch\fP array. Both
-\fByynmatch\fP and \fByypmatch\fP should be defined by the user, and \fByypmatch\fP
-size must be at least \fB[yynmatch * 2]\fP\&. re2c provides a directive
-\fB/*!maxnmatch:re2c*/\fP that defines \fBYYMAXNMATCH\fP: a constant  equal to the
-maximal value of \fByynmatch\fP among all rules. Note that re2c implements
-POSIX\-compliant disambiguation: each subexpression matches as long as possible,
-and subexpressions that start earlier in regular expression have priority over
-those starting later. Capturing groups are translated into s\-tags under the
-hood, therefore we use the word \(dqtag\(dq to describe them as well.
-.sp
-With both \fB\-P \-\-posix\-captures\fP and \fBT \-\-tags\fP options re2c uses efficient
-submatch extraction algorithm described in the
-\fI\%Tagged Deterministic Finite Automata with Lookahead\fP
-paper. The overhead on submatch extraction in the generated lexer grows with the
-number of tags \-\-\- if this number is moderate, the overhead is barely
-noticeable. In the lexer tags are implemented using a number of tag variables
-generated by re2c. There is no one\-to\-one correspondence between tag variables
-and tags: a single variable may be reused for different tags, and one tag may
-require multiple variables to hold all its ambiguous values. Eventually
-ambiguity is resolved, and only one final variable per tag survives. When a rule
-matches, all its tags are set to the values of the corresponding tag variables.
-The exact number of tag variables is unknown to the user; this number is
-determined by re2c. However, tag variables should be defined by the user as a
-part of the lexer state and updated by \fBYYFILL\fP, therefore re2c provides
-directives \fB/*!stags:re2c*/\fP and \fB/*!mtags:re2c*/\fP that can be used to
-declare, initialize and manipulate tag variables. These directives have two
-optional configurations: \fBformat = \(dq@@\(dq;\fP (specifies the template where \fB@@\fP
-is substituted with the name of each tag variable), and \fBseparator = \(dq\(dq;\fP
-(specifies the piece of code used to join the generated pieces for different
-tag variables).
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
 .sp
 S\-tags support the following operations:
 .INDENT 0.0
@@ -3102,7 +3111,7 @@ fn main() {
 .UNINDENT
 .UNINDENT
 .sp
-Here is an example of using POSIX capturing groups to parse semantic versions.
+Here is an example of using capturing groups to parse semantic versions.
 .INDENT 0.0
 .INDENT 3.5
 .sp
@@ -3110,9 +3119,6 @@ Here is an example of using POSIX capturing groups to parse semantic versions.
 .ft C
 // re2rust $INPUT \-o $OUTPUT
 
-// Maximum number of capturing groups among all rules.
-/*!maxnmatch:re2c*/
-
 #[derive(Debug, PartialEq)]
 struct SemVer(u32, u32, u32); // version: (major, minor, patch)
 
@@ -3129,32 +3135,25 @@ fn parse(yyinput: &[u8]) \-> Option<SemVer> {
 
     let (mut yycursor, mut yymarker) = (0, 0);
 
-    // Allocate memory for capturing parentheses (twice the number of groups).
-    let yynmatch: usize;
-    let mut yypmatch = [0; YYMAXNMATCH*2];
+    // Final tag variables available in semantic action.
+    /*!stags:re2c format = \(aqlet mut @@ = NONE;\(aq; */
 
     // Intermediate tag variables used by the lexer (must be autogenerated).
-    /*!stags:re2c format = \(aqlet mut @@ = NONE;\(aq; */
+    /*!svars:re2c format = \(aq#[allow(unused_mut)]\enlet mut @@;\en\(aq; */
 
     /*!re2c
         re2c:api = default;
         re2c:define:YYCTYPE = u8;
         re2c:yyfill:enable = 0;
-        re2c:posix\-captures = 1;
+        re2c:captvars = 1;
 
         num = [0\-9]+;
 
         (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
-            // \(gayynmatch\(ga is the number of capturing groups
-            assert_eq!(yynmatch, 4);
-
-            // Even \(gayypmatch\(ga values are for opening parentheses, odd values
-            // are for closing parentheses, the first group is the whole match.
-            let major = s2n(&yyinput[yypmatch[2]..yypmatch[3]]);
-            let minor = s2n(&yyinput[yypmatch[4]..yypmatch[5]]);
-            let patch = if yypmatch[6] == NONE {0}
-                else {s2n(&yyinput[yypmatch[6] + 1..yypmatch[7]])};
-
+            assert!(yytl0 == 0 && yytr0 == yyinput.len());
+            let major = s2n(&yyinput[yytl1..yytr1]);
+            let minor = s2n(&yyinput[yytl2..yytr2]);
+            let patch = if yytl3 == NONE {0} else {s2n(&yyinput[yytl3 + 1..yytr3])};
             return Some(SemVer(major, minor, patch));
         }
         * { return None; }
diff --git a/bootstrap/doc/re2v.1 b/bootstrap/doc/re2v.1
new file mode 100644
index 000000000..5506ba0b7
--- /dev/null
+++ b/bootstrap/doc/re2v.1
@@ -0,0 +1,3488 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for V, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-i
+
+fn lex(yyinput string) {
+    mut yycursor := 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        number = [1\-9][0\-9]*;
+
+        number { return }
+        *      { panic(\(dqerror!\(dq) }
+    */
+}
+
+fn main() {
+    lex(\(dq1234\ex00\(dq)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Code generated by re2v, DO NOT EDIT.
+// re2v $INPUT \-o $OUTPUT \-i
+
+fn lex(yyinput string) {
+    mut yycursor := 0
+    
+    mut yych := 0
+    yych = yyinput[yycursor]
+    match yych {
+        0x31...0x39 { unsafe { goto yy2 } }
+        else { unsafe { goto yy1 } }
+    }
+yy1:
+    yycursor += 1
+    panic(\(dqerror!\(dq)
+yy2:
+    yycursor += 1
+    yych = yyinput[yycursor]
+    match yych {
+        0x30...0x39 { unsafe { goto yy2 } }
+        else { unsafe { goto yy3 } }
+    }
+yy3:
+    return
+
+}
+
+fn main() {
+    lex(\(dq1234\ex00\(dq)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2v has three API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBSimple API\fP
+This is the default API for the V backend. It consists of the following
+primitives: \fBYYINPUT\fP (which should be defined as a sequence of code
+units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
+\fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBRecord API\fP
+Record API is useful in cases when lexer state must be stored in a struct.
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
+name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
+defined as a struct with fields \fByyinput\fP, \fByycursor\fP, \fByymarker\fP,
+\fByyctxmarker\fP, \fByylimit\fP (only the fields used by the generated code
+need to be defined, and their names can be configured).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+// Expect a null\-terminated string.
+fn lex(yyinput string) int {
+    mut yycursor := 0
+    mut count := 0
+
+loop: /*!re2c
+    re2c:yyfill:enable = 0;
+
+    *      { return \-1 }
+    [\ex00] { return count }
+    [a\-z]+ { count += 1; unsafe { goto loop } }
+    [ ]+   {  unsafe { goto loop } }
+    */
+}
+
+fn main() {
+    assert lex(\(dq\e0\(dq) == 0
+    assert lex(\(dqone two three\e0\(dq) == 3
+    assert lex(\(dqf0ur\e0\(dq) == \-1
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+// Expects a null\-terminated string.
+fn lex(yyinput string) int {
+    mut yycursor, mut yymarker := 0, 0
+    yylimit := yyinput.len \- 1 // yylimit points at the terminating null
+    mut count := 0
+
+loop: /*!re2c
+    re2c:eof = 0;
+    re2c:yyfill:enable = 0;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    *    { return \-1 }
+    $    { return count }
+    str  { count += 1; unsafe { goto loop } }
+    [ ]+ { unsafe { goto loop } }
+
+    */
+}
+
+fn main() {
+    assert lex(\(dq\e0\(dq) == 0
+    assert lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \e0\(dq) == 3
+    assert lex(\(dq\(aqunterminated\e\e\(aq\e0\(dq) == \-1
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+/*!max:re2c*/
+
+// Expects yymaxfill\-padded string.
+fn lex(str string) int {
+    // Pad string with yymaxfill zeroes at the end.
+    mut yyinput := []u8{len: str.len + yymaxfill}
+    copy(mut &yyinput, str.bytes())
+
+    mut yycursor := 0
+    yylimit := yyinput.len
+    mut count := 0
+
+loop: /*!re2c
+    re2c:define:YYFILL = \(dqreturn \-1\(dq;
+
+    str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+    [\ex00] {
+        // Check that it is the sentinel, not some unexpected null.
+        if yycursor \- 1 == str.len { return count } else { return \-1 }
+    }
+    str  { count += 1; unsafe { goto loop } }
+    [ ]+ { unsafe { goto loop } }
+    *    { return \-1 }
+
+    */
+}
+
+fn main() {
+    assert lex(\(dq\(dq) == 0
+    assert lex(\(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq) == 3
+    assert lex(\(dq\(aqunterminated\e\e\(aq\(dq) == \-1
+    assert lex(\(dq\(aqunexpected \e00 null\e\e\(aq\(dq) == \-1
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+// Returns \(dqfake\(dq terminating null if cursor has reached limit.
+fn peek(str string, cur int) u8 {
+    return if cur >= str.len { u8(0) } /* fake null */ else { return str[cur] }
+}
+
+// Expects a string without terminating null.
+fn lex(str string) int {
+    mut cur := 0
+    mut count := 0
+
+loop: /*!re2c
+    re2c:api = generic;
+    re2c:yyfill:enable = 0;
+    re2c:define:YYPEEK = \(dqpeek(str, cur)\(dq;
+    re2c:define:YYSKIP = \(dqcur += 1\(dq;
+
+    *      { return \-1 }
+    [\ex00] { return count }
+    [a\-z]+ { count += 1; unsafe { goto loop } }
+    [ ]+   { unsafe { goto loop } }
+
+    */
+}
+
+fn main() {
+    assert lex(\(dq\(dq) == 0
+    assert lex(\(dqone two three\(dq) == 3
+    assert lex(\(dqf0ur\(dq) == \-1
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+import os
+import strings
+
+const bufsize = 4096
+
+struct State {
+    file     os.File
+mut:
+    yyinput  []u8
+    yycursor int
+    yymarker int
+    yylimit  int
+    token    int
+    eof      bool
+}
+
+fn fill(mut st &State) int {
+    if st.eof { return \-1 } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if st.token < 1 { return \-2 }
+
+    // Shift buffer contents (discard everything up to the current token).
+    copy(mut &st.yyinput, st.yyinput[st.token..st.yylimit])
+    st.yycursor \-= st.token
+    st.yymarker \-= st.token
+    st.yylimit \-= st.token
+    st.token = 0
+
+    // Fill free space at the end of buffer with new data from file.
+    pos := st.file.tell() or { 0 }
+    if n := st.file.read_bytes_into(u64(pos), mut st.yyinput[st.yylimit..bufsize]) {
+        st.yylimit += n
+    }
+    st.yyinput[st.yylimit] = 0 // append sentinel symbol
+
+    // If read less than expected, this is the end of input.
+    st.eof = st.yylimit < bufsize
+
+    return 0
+}
+
+fn lex(mut yyrecord &State) int {
+    mut count := 0
+loop:
+    yyrecord.token = yyrecord.yycursor
+    /*!re2c
+        re2c:api = record;
+        re2c:eof = 0;
+        re2c:define:YYFILL = \(dqfill(mut yyrecord) == 0\(dq;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        *    { return \-1 }
+        $    { return count }
+        str  { count += 1; unsafe { goto loop } }
+        [ ]+ { unsafe { goto loop } }
+    */
+}
+
+fn main() {
+    fname := \(dqinput\(dq
+    content := \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    mut fw := os.create(fname)!
+    fw.write_string(strings.repeat_string(content, bufsize))!
+    fw.close()
+    count := 3 * bufsize // number of quoted strings written to file
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    mut fr := os.open(fname)!
+    mut st := &State{
+        file:     fr,
+        // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+        yyinput:  []u8{len: bufsize + 1},
+        yycursor: bufsize,
+        yymarker: bufsize,
+        yylimit:  bufsize,
+        token:    bufsize,
+        eof:      false,
+    }
+
+    // Run the lexer.
+    n := lex(mut st)
+    if n != count { panic(\(dqexpected $count, got $n\(dq) }
+
+    // Cleanup: remove input file.
+    fr.close()
+    os.rm(fname)!
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+import os
+import strings
+
+/*!max:re2c*/
+const bufsize = 4096
+
+struct State {
+    file     os.File
+mut:
+    yyinput  []u8
+    yycursor int
+    yylimit  int
+    token    int
+    eof      bool
+}
+
+fn fill(mut st &State, need int) int {
+    if st.eof { return \-1 } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if st.token < need { return \-2 }
+
+    // Shift buffer contents (discard everything up to the current token).
+    copy(mut &st.yyinput, st.yyinput[st.token..st.yylimit])
+    st.yycursor \-= st.token
+    st.yylimit \-= st.token
+    st.token = 0
+
+    // Fill free space at the end of buffer with new data from file.
+    pos := st.file.tell() or { 0 }
+    if n := st.file.read_bytes_into(u64(pos), mut st.yyinput[st.yylimit..bufsize]) {
+        st.yylimit += n
+    }
+
+    // If read less than expected, this is the end of input.
+    if st.yylimit < bufsize {
+        st.eof = true
+        for i := 0; i < yymaxfill; i += 1 { st.yyinput[st.yylimit + i] = 0 }
+        st.yylimit += yymaxfill
+    }
+
+    return 0
+}
+
+fn lex(mut yyrecord &State) int {
+    mut count := 0
+loop:
+    yyrecord.token = yyrecord.yycursor
+    /*!re2c
+        re2c:api = record;
+        re2c:define:YYFILL = \(dqr := fill(mut yyrecord, @@); if r != 0 { return r }\(dq;
+
+        str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+        [\ex00] {
+            // Check that it is the sentinel, not some unexpected null.
+            return if yyrecord.token == (yyrecord.yylimit \- yymaxfill) { count } else { \-1 }
+        }
+        str  { count += 1; unsafe { goto loop } }
+        [ ]+ { unsafe { goto loop } }
+        *    { return \-1 }
+    */
+}
+
+fn main() {
+    fname := \(dqinput\(dq
+    content := \(dq\(aqqu\e0tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    mut fw := os.create(fname)!
+    fw.write_string(strings.repeat_string(content, bufsize))!
+    fw.close()
+    count := 3 * bufsize // number of quoted strings written to file
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    // This immediately triggers YYFILL, as the YYLESSTHAN condition is true.
+    mut fr := os.open(fname)!
+    mut st := &State{
+        file:     fr,
+        yyinput:  []u8{len: bufsize + yymaxfill},
+        yycursor: bufsize,
+        yylimit:  bufsize,
+        token:    bufsize,
+        eof:      false,
+    }
+
+    // Run the lexer.
+    n := lex(mut st)
+    if n != count { panic(\(dqexpected $count, got $n\(dq) }
+
+    // Cleanup: remove input file.
+    fr.close()
+    os.rm(fname)!
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-i
+
+const u32_lim = u64(1) << 32
+
+fn parse_u32(yyinput string) ?u32 {
+    mut yycursor, mut yymarker := 0, 0
+    mut n := u64(0)
+    mut yych := 0
+
+    adddgt := fn (num u64, base u64, digit u8) u64 {
+        n := num * base + u64(digit)
+        return if n >= u32_lim { u32_lim } else { n }
+    }
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:variable:yych:emit = 0;
+
+        end = \(dq\ex00\(dq;
+
+        \(aq0b\(aq / [01]        { unsafe{ goto bin } }
+        \(dq0\(dq                { unsafe{ goto oct } }
+        \(dq\(dq   / [1\-9]       { unsafe{ goto dec } }
+        \(aq0x\(aq / [0\-9a\-fA\-F] { unsafe{ goto hex } }
+        *                  { return none }
+    */
+bin:
+    /*!re2c
+        end   { unsafe{ goto end } }
+        [01]  { n = adddgt(n, 2, yyinput[yycursor\-1] \- 48); unsafe{ goto bin } }
+        *     { return none }
+    */
+oct:
+    /*!re2c
+        end   { unsafe{ goto end } }
+        [0\-7] { n = adddgt(n, 8, yyinput[yycursor\-1] \- 48); unsafe{ goto oct } }
+        *     { return none }
+    */
+dec:
+    /*!re2c
+        end   { unsafe{ goto end } }
+        [0\-9] { n = adddgt(n, 10, yyinput[yycursor\-1] \- 48); unsafe{ goto dec } }
+        *     { return none }
+    */
+hex:
+    /*!re2c
+        end   { unsafe{ goto end } }
+        [0\-9] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 48); unsafe{ goto hex } }
+        [a\-f] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 87); unsafe{ goto hex } }
+        [A\-F] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 55); unsafe{ goto hex } }
+        *     { return none }
+    */
+end:
+    if n < u32_lim {
+        return u32(n)
+    }
+    return none
+}
+
+fn main() {
+    test := fn (num ?u32, str string) {
+        if n := parse_u32(str) {
+            if m := num { if n != m { panic(\(dqwrong number\(dq) } }
+        } else {
+            if _ := num { panic(\(dqexpected none\(dq) }
+        }
+    }
+    test(1234567890, \(dq1234567890\e0\(dq)
+    test(13, \(dq0b1101\e0\(dq)
+    test(0x7fe, \(dq0x007Fe\e0\(dq)
+    test(0o644, \(dq0644\e0\(dq)
+    test(none, \(dq9999999999\e0\(dq)
+    test(none, \(dq123??\e0\(dq)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-ci
+
+/*!conditions:re2c*/
+
+const u32_lim = u64(1) << 32
+
+fn parse_u32(yyinput string) ?u32 {
+    mut yycursor, mut yymarker := 0, 0
+    mut n := u64(0)
+    mut yycond := YYCONDTYPE.yycinit
+
+    adddgt := fn (num u64, base u64, digit u8) u64 {
+        n := num * base + u64(digit)
+        return if n >= u32_lim { u32_lim } else { n }
+    }
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        <*> * { return none }
+
+        <init> \(aq0b\(aq / [01]        :=> bin
+        <init> \(dq0\(dq                :=> oct
+        <init> \(dq\(dq   / [1\-9]       :=> dec
+        <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+
+        <bin, oct, dec, hex> \(dq\ex00\(dq { return if n < u32_lim { u32(n) } else { none } }
+
+        <bin> [01]  { n = adddgt(n, 2,  yyinput[yycursor\-1] \- 48); unsafe{ goto yyc_bin } }
+        <oct> [0\-7] { n = adddgt(n, 8,  yyinput[yycursor\-1] \- 48); unsafe{ goto yyc_oct } }
+        <dec> [0\-9] { n = adddgt(n, 10, yyinput[yycursor\-1] \- 48); unsafe{ goto yyc_dec } }
+        <hex> [0\-9] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 48); unsafe{ goto yyc_hex } }
+        <hex> [a\-f] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 87); unsafe{ goto yyc_hex } }
+        <hex> [A\-F] { n = adddgt(n, 16, yyinput[yycursor\-1] \- 55); unsafe{ goto yyc_hex } }
+    */
+}
+
+fn main() {
+    test := fn (num ?u32, str string) {
+        if n := parse_u32(str) {
+            if m := num { if n != m { panic(\(dqwrong number\(dq) } }
+        } else {
+            if _ := num { panic(\(dqexpected none\(dq) }
+        }
+    }
+    test(1234567890, \(dq1234567890\e0\(dq)
+    test(13, \(dq0b1101\e0\(dq)
+    test(0x7fe, \(dq0x007Fe\e0\(dq)
+    test(0o644, \(dq0644\e0\(dq)
+    test(none, \(dq9999999999\e0\(dq)
+    test(none, \(dq123??\e0\(dq)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v \-f $INPUT \-o $OUTPUT
+
+import log
+import os
+
+// Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+// In real world use a larger buffer.
+const bufsize = 10
+
+struct State {
+mut:
+    file     os.File
+    yyinput  []u8
+    yycursor int
+    yymarker int
+    yylimit  int
+    token    int
+    yystate  int
+}
+
+enum Status {
+    lex_end
+    lex_ready
+    lex_waiting
+    lex_bad_packet
+    lex_big_packet
+}
+
+fn fill(mut st &State) Status {
+    shift := st.token
+    used := st.yylimit \- st.token
+    free := bufsize \- used
+
+    // Error: no space. In real life can reallocate a larger buffer.
+    if free < 1 { return .lex_big_packet }
+
+    // Shift buffer contents (discard already processed data).
+    copy(mut &st.yyinput, st.yyinput[shift..shift+used])
+    st.yycursor \-= shift
+    st.yymarker \-= shift
+    st.yylimit \-= shift
+    st.token \-= shift
+
+    // Fill free space at the end of buffer with new data.
+    pos := st.file.tell() or { 0 }
+    if n := st.file.read_bytes_into(u64(pos), mut st.yyinput[st.yylimit..bufsize]) {
+        st.yylimit += n
+    }
+    st.yyinput[st.yylimit] = 0 // append sentinel symbol
+
+    return .lex_ready
+}
+
+fn lex(mut yyrecord &State, mut recv &int) Status {
+    mut yych := u8(0)
+    /*!getstate:re2c*/
+loop:
+    yyrecord.token = yyrecord.yycursor
+    /*!re2c
+        re2c:api = record;
+        re2c:eof = 0;
+        re2c:define:YYFILL = \(dqreturn .lex_waiting\(dq;
+
+        packet = [a\-z]+[;];
+
+        *      { return .lex_bad_packet }
+        $      { return .lex_end }
+        packet { recv += 1; unsafe{ goto loop } }
+    */
+}
+
+fn test(expect Status, packets []string) {
+    // Create a pipe (open the same file for reading and writing).
+    fname := \(dqpipe\(dq
+    mut fw := os.create(fname) or { panic(\(dqcannot create file\(dq) }
+    mut fr := os.open(fname) or { panic(\(dqcannot open file\(dq) }
+
+    // Initialize lexer state: \(gastate\(ga value is \-1, all offsets are at the end
+    // of buffer.
+    mut st := &State{
+        file:     fr,
+        // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+        yyinput:  []u8{len: bufsize + 1},
+        yycursor: bufsize,
+        yymarker: bufsize,
+        yylimit:  bufsize,
+        token:    bufsize,
+        yystate:  \-1,
+    }
+
+    // Main loop. The buffer contains incomplete data which appears packet by
+    // packet. When the lexer needs more input it saves its internal state and
+    // returns to the caller which should provide more input and resume lexing.
+    mut status := Status.lex_ready
+    mut send := 0
+    mut recv := 0
+    for {
+        status = lex(mut st, mut &recv)
+        if status == .lex_end {
+            break
+        } else if status == .lex_waiting {
+            if send < packets.len {
+                log.debug(\(dqsending packet $send\(dq)
+                fw.write_string(packets[send]) or { panic(\(dqcannot write to file\(dq) }
+                fw.flush()
+                send += 1
+            }
+            status = fill(mut st)
+            log.debug(\(dqfilled buffer $st.yyinput, status $status\(dq)
+            if status != .lex_ready {
+                break
+            }
+        } else if status == .lex_bad_packet {
+            break
+        }
+    }
+
+    // Check results.
+    if status != expect || (status == .lex_end && recv != send) {
+        panic(\(dqexpected $expect with $send packet(s), got $status with $recv packet(s)\(dq)
+    }
+
+    // Cleanup: remove input file.
+    fr.close()
+    fw.close()
+    os.rm(fname) or { panic(\(dqcannot remove file\(dq) }
+}
+
+fn main() {
+    //log.set_level(.debug)
+
+    test(.lex_end, [])
+    test(.lex_end, [\(dqzero;\(dq, \(dqone;\(dq, \(dqtwo;\(dq, \(dqthree;\(dq, \(dqfour;\(dq])
+    test(.lex_bad_packet, [\(dq??;\(dq])
+    test(.lex_big_packet, [\(dqlooooooooooooong;\(dq])
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+// This example shows how to combine reusable re2c blocks: two blocks
+// (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+// in both blocks; the \(aqfish\(aq block takes priority because it is used
+// earlier. Default rule * occurs in all three blocks; the local (not
+// inherited) definition takes priority.
+
+enum What {
+    color
+    fish
+    dunno
+}
+
+/*!rules:re2c:colors
+    *                            { panic(\(dqeh!\(dq) }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { return .color }
+*/
+
+/*!rules:re2c:fish
+    *                            { panic(\(dqoh!\(dq) }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { return .fish }
+*/
+
+fn lex(yyinput string) What {
+    mut yycursor, mut yymarker := 0, 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        !use:fish;
+        !use:colors;
+        * { return .dunno }  // overrides inherited \(aq*\(aq rules
+    */
+}
+
+fn main() {
+    assert lex(\(dqsalmon\(dq) == .fish
+    assert lex(\(dqwhat?\(dq) == .dunno
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+
+// This example supports multiple input encodings: UTF\-8 and UTF\-32.
+// Both lexers are generated from the same rules block, and the use
+// blocks add only encoding\-specific configurations.
+/*!rules:re2c
+    re2c:yyfill:enable = 0;
+
+    \(dq∀x ∃y\(dq { return 0 }
+    *       { return 1 }
+*/
+
+fn lex_utf8(yyinput []u8) int {
+    mut yycursor, mut yymarker := 0, 0
+    /*!use:re2c
+        re2c:encoding:utf8 = 1;
+        re2c:define:YYCTYPE = u8; // the default
+    */
+}
+
+fn lex_utf32(yyinput []u32) int {
+    mut yycursor, mut yymarker := 0, 0
+    /*!use:re2c
+        re2c:encoding:utf32 = 1;
+        re2c:define:YYCTYPE = u32;
+    */
+}
+
+fn main() {
+    s8 := [u8(0xe2), u8(0x88), u8(0x80), u8(0x78), u8(0x20), u8(0xe2), u8(0x88), u8(0x83), u8(0x79)]
+    s32 := [u32(0x2200), u32(0x78), u32(0x20), u32(0x2203), u32(0x79)]
+    assert lex_utf8(s8) == 0
+    assert lex_utf32(s32) == 0
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+struct SemVer {
+    major int
+    minor int
+    patch int
+}
+
+fn s2n(s string) int { // convert pre\-parsed string to number
+    mut n := 0
+    for c in s { n = n * 10 + int(c \- 48) }
+    return n
+}
+
+fn parse(yyinput string) ?SemVer {
+    mut yycursor, mut yymarker := 0, 0
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqmut @@ := 0\en\(aq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(aqmut @@ := \-1\en\(aq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+            return SemVer{
+                major: s2n(yyinput[t1..t2]),
+                minor: s2n(yyinput[t3..t4]),
+                patch: if t5 == \-1 { 0 } else { s2n(yyinput[t5..yycursor \- 1]) }
+            }
+        }
+        * { return none }
+    */
+}
+
+fn main() {
+    test := fn (result ?SemVer, expect ?SemVer) {
+        if r := result {
+            if e := expect { if r != e { panic(\(dqexpected $e, got $r\(dq) } }
+        } else {
+            if _ := result { panic(\(dqexpected none\(dq) }
+        }
+    }
+    test(parse(\(dq23.34\e0\(dq), SemVer{23, 34, 0})
+    test(parse(\(dq1.2.9999\e0\(dq), SemVer{1, 2, 9999})
+    test(parse(\(dq1.a\e0\(dq), none)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+import arrays
+import os
+import strings
+
+const bufsize = 4096
+const tag_none = \-1
+
+struct State {
+    file     os.File
+mut:
+    yyinput  []u8
+    yycursor int
+    yymarker int
+    yylimit  int
+    token    int
+    // Intermediate tag variables must be part of the lexer state passed to YYFILL.
+    // They don\(aqt correspond to tags and should be autogenerated by re2c.
+    /*!stags:re2c format = \(dq\et@@ int\en\(dq; */
+    eof      bool
+}
+
+struct SemVer {
+    major int
+    minor int
+    patch int
+}
+
+fn s2n(s []u8) int { // convert pre\-parsed string to number
+    mut n := 0
+    for c in s { n = n * 10 + int(c \- 48) }
+    return n
+}
+
+fn fill(mut st &State) int {
+    if st.eof { return \-1 } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if st.token < 1 { return \-2 }
+
+    // Shift buffer contents (discard everything up to the current token).
+    copy(mut &st.yyinput, st.yyinput[st.token..st.yylimit])
+    st.yycursor \-= st.token
+    st.yymarker \-= st.token
+    st.yylimit \-= st.token
+    // Tag variables need to be shifted like other input positions. The check
+    // for \-1 is only needed if some tags are nested inside of alternative or
+    // repetition, so that they can have \-1 value.
+    /*!stags:re2c format = \(dq\etif st.@@ != \-1 { st.@@ \-= st.token }\en\(dq; */
+    st.token = 0
+
+    // Fill free space at the end of buffer with new data from file.
+    pos := st.file.tell() or { 0 }
+    if n := st.file.read_bytes_into(u64(pos), mut st.yyinput[st.yylimit..bufsize]) {
+        st.yylimit += n
+    }
+    st.yyinput[st.yylimit] = 0 // append sentinel symbol
+
+    // If read less than expected, this is the end of input.
+    st.eof = st.yylimit < bufsize
+
+    return 0
+}
+
+fn parse(mut st &State) ?[]SemVer {
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(dqmut @@ := tag_none\en\(dq; */
+
+    mut vers := []SemVer{}
+loop:
+    st.token = st.yycursor
+    /*!re2c
+        re2c:api = record;
+        re2c:variable:yyrecord = st;
+        re2c:define:YYFILL = \(dqfill(mut st) == 0\(dq;
+        re2c:tags = 1;
+        re2c:eof = 0;
+
+        num = [0\-9]+;
+
+        num @t1 \(dq.\(dq @t2 num @t3 (\(dq.\(dq @t4 num)? [\en] {
+            ver := SemVer {
+                major: s2n(st.yyinput[st.token..t1]),
+                minor: s2n(st.yyinput[t2..t3]),
+                patch: if t4 == \-1 { 0 } else { s2n(st.yyinput[t4..st.yycursor \- 1]) }
+            }
+            vers = arrays.concat(vers, ver)
+            unsafe { goto loop }
+        }
+        $ { return vers }
+        * { return none }
+    */
+}
+
+fn main() {
+    fname := \(dqinput\(dq
+    content := \(dq1.22.333\en\(dq;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    mut fw := os.create(fname)!
+    fw.write_string(strings.repeat_string(content, bufsize))!
+    fw.close()
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    mut fr := os.open(fname)!
+    mut st := &State{
+        file:      fr,
+        // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+        yyinput:  []u8{len: bufsize + 1},
+        yycursor: bufsize,
+        yymarker: bufsize,
+        yylimit:  bufsize,
+        token:    bufsize,
+        eof:      false,
+    }
+
+    // Run the lexer.
+    expect := []SemVer{len: bufsize, init: SemVer{1, 22, 333}}
+    result := parse(mut st) or { panic(\(dqparse failed\(dq) }
+    if result != expect { panic(\(dqerror\(dq) }
+
+    // Cleanup: remove input file.
+    fr.close()
+    os.rm(fname)!
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+struct SemVer {
+    major int
+    minor int
+    patch int
+}
+
+fn s2n(s string) int { // convert pre\-parsed string to number
+    mut n := 0
+    for c in s { n = n * 10 + int(c \- 48) }
+    return n
+}
+
+fn parse(yyinput string) ?SemVer {
+    mut yycursor, mut yymarker := 0, 0
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqmut @@ := 0\en\(aq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(aqmut @@ := 0\en\(aq; */
+
+    /*!re2c
+        re2c:yyfill:enable = 0;
+        re2c:captvars = 1;
+
+        num = [0\-9]+;
+
+        (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+            _ := yytl0; _ := yytr0 // some variables are unused
+            return SemVer {
+                major: s2n(yyinput[yytl1..yytr1]),
+                minor: s2n(yyinput[yytl2..yytr2]),
+                patch: if yytl3 == \-1 {0} else {s2n(yyinput[yytl3 + 1..yytr3])}
+            }
+        }
+        * { return none }
+    */
+}
+
+fn main() {
+    test := fn (result ?SemVer, expect ?SemVer) {
+        if r := result {
+            if e := expect { if r != e { panic(\(dqexpected $e, got $r\(dq) } }
+        } else {
+            if _ := result { panic(\(dqexpected none\(dq) }
+        }
+    }
+    test(parse(\(dq23.34\e0\(dq), SemVer{23, 34, 0})
+    test(parse(\(dq1.2.9999\e0\(dq), SemVer{1, 2, 9999})
+    test(parse(\(dq1.a\e0\(dq), none)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT
+
+import arrays
+
+const mtag_root = \-1
+const tag_none = \-1
+
+// An m\-tag tree is a way to store histories with an O(1) copy operation.
+// Histories naturally form a tree, as they have common start and fork at some
+// point. The tree is stored as an array of pairs (tag value, link to parent).
+// An m\-tag is represented with a single link in the tree (array index).
+struct MtagElem {
+    elem int
+    pred int
+}
+type MtagTrie = []MtagElem
+
+// Append a single value to an m\-tag history.
+fn add_mtag(mut trie &MtagTrie, mtag int, value int) int {
+    trie = arrays.concat(trie, MtagElem{value, mtag})
+    return trie.len \- 1
+}
+
+// Recursively unwind tag histories and collect version components.
+fn unwind(trie MtagTrie, x int, y int, str string) []int {
+    // Reached the root of the m\-tag tree, stop recursion.
+    if x == mtag_root && y == mtag_root {
+        return []
+    }
+
+    // Unwind history further.
+    mut result := unwind(trie, trie[x].pred, trie[y].pred, str)
+
+    // Get tag values. Tag histories must have equal length.
+    if x == mtag_root || y == mtag_root {
+        panic(\(dqtag histories have different length\(dq)
+    }
+    ex := trie[x].elem
+    ey := trie[y].elem
+
+    if ex != tag_none && ey != tag_none {
+        // Both tags are valid string indices, extract component.
+        result = arrays.concat(result, s2n(str[ex..ey]))
+    } else if !(ex == tag_none && ey == tag_none) {
+        panic(\(dqboth tags should be tag_none\(dq)
+    }
+    return result
+}
+
+fn s2n(s string) int { // convert pre\-parsed string to number
+    mut n := 0
+    for c in s { n = n * 10 + int(c \- 48) }
+    return n
+}
+
+fn parse(yyinput string) ?[]int {
+    mut yycursor, mut yymarker := 0, 0
+    mut trie := []MtagElem{}
+
+    // Final tag variables available in semantic action.
+    /*!svars:re2c format = \(aqmut @@ := tag_none\en\(aq; */
+    /*!mvars:re2c format = \(dqmut @@ := mtag_root\en\(dq; */
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    /*!stags:re2c format = \(aqmut @@ := tag_none\en\(aq; */
+    /*!mtags:re2c format = \(dqmut @@ := mtag_root\en\(dq; */
+
+    /*!re2c
+        re2c:tags = 1;
+        re2c:yyfill:enable = 0;
+        re2c:define:YYMTAGP = \(dq@@ = add_mtag(mut &trie, @@, yycursor)\(dq;
+        re2c:define:YYMTAGN = \(dq@@ = add_mtag(mut &trie, @@, tag_none)\(dq;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+            mut ver := []int{}
+            ver = arrays.concat(ver, s2n(yyinput[t1..t2]))
+            ver = arrays.append(ver, unwind(trie, t3, t4, yyinput))
+            return ver
+        }
+        * { return none }
+    */
+}
+
+fn main() {
+    test := fn (result ?[]int, expect ?[]int) {
+        if r := result {
+            if e := expect { if r != e { panic(\(dqexpected $e, got $r\(dq) } }
+        } else {
+            if _ := result { panic(\(dqexpected none\(dq) }
+        }
+    }
+    test(parse(\(dq1\e0\(dq), [1])
+    test(parse(\(dq1.2.3.4.5.6.7\e0\(dq), [1, 2, 3, 4, 5, 6, 7])
+    test(parse(\(dq1.\e0\(dq), none)
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-\-utf8 \-si
+
+/*!include:re2c \(dqunicode_categories.re\(dq */
+
+fn lex(yyinput string) int {
+    mut yycursor, mut yymarker := 0, 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+        // (see https://unicode.org/reports/tr31)
+        id_start    = L | Nl | [$_];
+        id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+        identifier  = id_start id_continue*;
+
+        identifier { return 0 }
+        *          { return 1 }
+    */
+}
+
+fn main() {
+    if lex(\(dq_Ыдентификатор\e0\(dq) != 0 {
+        panic(\(dqerror\(dq)
+    }
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.v)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+enum Result {
+    ok
+    fail
+}
+
+/*!re2c
+    number = [1\-9][0\-9]*;
+*/
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { return .ok }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-i
+
+/*!include:re2c \(dqdefinitions.v\(dq */
+
+fn lex(yyinput string) Result {
+    mut yycursor, mut yymarker := 0, 0
+    /*!re2c
+        re2c:yyfill:enable = 0;
+
+        *      { return .fail }
+        number { return .ok }
+        !include \(dqextra_rules.re.inc\(dq;
+    */
+}
+
+fn main() {
+    assert lex(\(dq123\e0\(dq) == .ok
+    assert lex(\(dq123.4567\e0\(dq) == .ok
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2v $INPUT \-o $OUTPUT \-i \-\-header lexer/state.v
+module main
+
+import lexer // the package is generated by re2c
+
+/*!header:re2c:on*/
+module lexer
+
+pub struct State {
+pub mut:
+    yyinput string
+    yycursor int
+    /*!stags:re2c format=\(dq@@ int\en\(dq; */
+}
+/*!header:re2c:off*/
+
+fn lex(mut yyrecord &lexer.State) int {
+    mut t := 0
+    /*!re2c
+        re2c:header = \(dqlexer/state.v\(dq;
+        re2c:api = record;
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        [a]* @t [b]* { return t }
+    */
+}
+
+fn main() {
+    mut st := &lexer.State{yyinput:\(dqab\e0\(dq,}
+    if lex(mut st) != 1 {
+        panic(\(dqerror\(dq)
+    }
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Code generated by re2c, DO NOT EDIT.
+
+module lexer
+
+pub struct State {
+pub mut:
+    yyinput string
+    yycursor int
+    
+yyt1 int
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/bootstrap/doc/re2zig.1 b/bootstrap/doc/re2zig.1
new file mode 100644
index 000000000..a725e39cc
--- /dev/null
+++ b/bootstrap/doc/re2zig.1
@@ -0,0 +1,3530 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "RE2C" 1 "" ""
+.SH NAME
+re2c \- generate fast lexical analyzers for C/C++, Go and Rust
+.SH SYNOPSIS
+.sp
+Note: This manual is for Zig, but it refers to re2c as the general program.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+re2c    [ OPTIONS ] [ WARNINGS ] INPUT
+re2go   [ OPTIONS ] [ WARNINGS ] INPUT
+re2rust [ OPTIONS ] [ WARNINGS ] INPUT
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Input can be either a file or \fB\-\fP for stdin.
+.SH INTRODUCTION
+.sp
+re2c works as a preprocessor. It reads the input file (which is usually a
+program in the target language, but can be anything) and looks for blocks of
+code enclosed in special\-form comments. The text outside of these blocks is
+copied verbatim into the output file. The contents of the blocks are processed
+by re2c. It translates them to code in the target language and outputs the
+generated code in place of the block.
+.sp
+Here is an example of a small program that checks if a given string contains a
+decimal number:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+fn lex(yyinput: [:0]const u8) bool {
+    var yycursor: u32 = 0;
+    %{
+        re2c:yyfill:enable = 0;
+
+        number = [1\-9][0\-9]*;
+
+        number { return true; }
+        *      { return false; }
+    %}
+}
+
+test {
+    try std.testing.expect(lex(\(dq1234\(dq));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the output everything between \fB/*!re2c\fP and \fB*/\fP has been replaced with
+the generated code:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2zig
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+fn lex(yyinput: [:0]const u8) bool {
+    var yycursor: u32 = 0;
+    
+    var yych: u8 = 0;
+    var yystate: u32 = 0;
+    yyl: while (true) {
+        switch (yystate) {
+            0 => {
+                yych = yyinput[yycursor];
+                yycursor += 1;
+                switch (yych) {
+                    0x31...0x39 => {
+                        yystate = 2;
+                        continue :yyl;
+                    },
+                    else => {
+                        yystate = 1;
+                        continue :yyl;
+                    },
+                }
+            },
+            1 => { return false; },
+            2 => {
+                yych = yyinput[yycursor];
+                switch (yych) {
+                    0x30...0x39 => {
+                        yycursor += 1;
+                        yystate = 2;
+                        continue :yyl;
+                    },
+                    else => {
+                        yystate = 3;
+                        continue :yyl;
+                    },
+                }
+            },
+            3 => { return true; },
+            else => { @panic(\(dqinternal lexer error\(dq); },
+        }
+    }
+
+}
+
+test {
+    try std.testing.expect(lex(\(dq1234\(dq));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SYNTAX
+.sp
+A re2c program consists of a sequence of \fIblocks\fP intermixed with code in the
+target language. There are three main kinds of blocks:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A \fIglobal block\fP contains definitions, configurations, directives and rules.
+re2c compiles regular expressions associated with each rule into a
+deterministic finite automaton, encodes it in the form of conditional jumps
+in the target language and replaces the block with the generated code. Names
+and configurations defined in a global block are added to the global scope
+and become visible to subsequent blocks. At the start of the program the
+global scope is initialized with command\-line \fI\%options\fP\&.
+The \fB:<name>\fP part is optional: if specified, the name can be used to
+refer to the block in another part of the program.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A \fIlocal block\fP is like a global block, but the names and configurations in
+it have local scope (they do not affect other blocks).
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A \fIrules block\fP is like a local block, but it does not generate any code and
+is meant to be reused in other blocks. This is a way of sharing code
+(more details in the \fI\%reusable blocks\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+There are also many auxiliary blocks; see section \fI\%blocks and directives\fP for a
+full list of them. A block may contain the following kinds of statements:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB<name> = <regular expression>;\fP
+A \fIdefinition\fP binds a name to a regular expression. Names may contain
+alphanumeric characters and underscore. The \fI\%regular expressions\fP section
+gives an overview of re2c syntax for regular expressions. Once defined, the
+name can be used in other regular expressions and in rules. Recursion in
+named definitions is not allowed, and each name should be defined before it
+is used. A block inherits named definitions from the global scope.
+Redefining a name that exists in the current scope is an error.
+.TP
+.B \fB<configuration> = <value>;\fP
+A \fIconfiguration\fP allows one to change re2c behavior and customize the
+generated code. For a full list of configurations supported by re2c see the
+\fI\%configurations\fP section. Depending on a particular configuration, the
+value can be a keyword, a nonnegative integer number or a one\-line string
+which should be enclosed in double or single quotes unless it consists of
+alphanumeric characters. A block inherits configurations from the global
+scope and may redefine them or add new ones. Configurations defined inside
+of a block affect the whole block, even if they appear at the end of it.
+.TP
+.B \fB<regular expression> { <code> }\fP
+A \fIrule\fP binds a regular expression to a semantic action (a block of code in
+the target language). If the regular expression matches, the associated
+semantic action is executed. If multiple rules match, the longest match
+takes precedence. If multiple rules match the same string, the earliest one
+takes precedence. There are two special rules: the default rule \fB*\fP and
+the end of input rule \fB$\fP\&. The default rule should always be defined, it
+has the lowest priority regardless of its place in the block, and it matches
+any code unit (not necessarily a valid character, see the
+\fI\%encoding support\fP section). The end of input rule should be defined if the
+corresponding method for \fI\%handling the end of input\fP is used. If
+\fI\%start conditions\fP are used, rules have more complex syntax.
+.TP
+.B \fB!<directive>;\fP
+A \fIdirective\fP is one of the special predefined statements. Each directive
+has a unique purpose. For example, the \fB!use\fP directive merges a rules
+block into the current one (see the \fI\%reusable blocks\fP section), and the
+\fB!include\fP directive allows one to include an outer file (see the
+\fI\%include files\fP section).
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH PROGRAM INTERFACE (API)
+.sp
+The generated code interfaces with the outer program with the help of
+\fIprimitives\fP, collectively referred to as the \fIAPI\fP\&.
+Which primitives should be defined for a particular program depends on multiple
+factors, including the complexity of regular expressions, input representation,
+buffering and the use of various features. All the necessary primitives should
+be defined by the user in the form of macros, functions, variables or any other
+suitable form that makes the generated code syntactically and semantically
+correct. re2c does not (and cannot) check the definitions, so if anything is
+missing or defined incorrectly, the generated program may have compile\-time or
+run\-time errors.
+This manual provides examples of API definitions in the most common cases.
+.sp
+re2zig has three API flavors that define the core set of primitives used by a
+program:
+.INDENT 0.0
+.TP
+.B \fBSimple API\fP
+This is the default API for the Zig backend. It consists of the following
+primitives: \fBYYINPUT\fP (which should be defined as a sequence of code
+units, e.g. a string) and \fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP,
+\fBYYLIMIT\fP (which should be defined as indices in \fBYYINPUT\fP).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBRecord API\fP
+Record API is useful in cases when lexer state must be stored in a struct.
+It is enabled with \fB\-\-api record\fP option or \fBre2c:api = record\fP
+configuration. This API consists of a variable \fByyrecord\fP (the
+name can be overridden with \fBre2c:variable:yyrecord\fP) that should be
+defined as a struct with fields \fByyinput\fP, \fByycursor\fP, \fByymarker\fP,
+\fByyctxmarker\fP, \fByylimit\fP (only the fields used by the generated code
+need to be defined, and their names can be configured).
+.nf
+
+.fi
+.sp
+.TP
+.B \fBGeneric API\fP
+This is the most flexible API. It is enabled with \fB\-\-api generic\fP option
+or \fBre2c:api = generic\fP configuration.
+It contains primitives for generic operations:
+\fBYYPEEK\fP,
+\fBYYSKIP\fP,
+\fBYYBACKUP\fP,
+\fBYYBACKUPCTX\fP,
+\fBYYSTAGP\fP,
+\fBYYSTAGN\fP,
+\fBYYMTAGP\fP,
+\fBYYMTAGN\fP,
+\fBYYRESTORE\fP,
+\fBYYRESTORECTX\fP,
+\fBYYRESTORETAG\fP,
+\fBYYSHIFT\fP,
+\fBYYSHIFTSTAG\fP,
+\fBYYSHIFTMTAG\fP,
+\fBYYLESSTHAN\fP\&.
+.UNINDENT
+.sp
+Here is a full list of API primitives that may be used by the generated code in
+order to interface with the outer program.
+.INDENT 0.0
+.TP
+.B \fBYYCTYPE\fP
+The type of the input characters (code units).
+For ASCII, EBCDIC and UTF\-8 encodings it should be 1\-byte unsigned integer.
+For UTF\-16 or UCS\-2 it should be 2\-byte unsigned integer. For UTF\-32 it
+should be 4\-byte unsigned integer.
+.TP
+.B \fBYYCURSOR\fP
+A pointer\-like l\-value that stores the current input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYCURSOR\fP should point to the
+first input character. It is advanced by the generated code.
+When a rule matches, \fBYYCURSOR\fP points to the position after the
+last matched character. It is used only in C pointer API.
+.TP
+.B \fBYYLIMIT\fP
+A pointer\-like r\-value that stores the end of input position (usually a
+pointer of type \fBYYCTYPE*\fP). Initially \fBYYLIMIT\fP should point to the
+position after the last available input character. It is not changed by the
+generated code. The lexer compares \fBYYCURSOR\fP to \fBYYLIMIT\fP
+in order to determine if there are enough input characters left.
+\fBYYLIMIT\fP is used only in C pointer API.
+.TP
+.B \fBYYMARKER\fP
+A pointer\-like l\-value (usually a pointer of type \fBYYCTYPE*\fP)
+that stores the position of the latest matched rule. It is used to
+restore the \fBYYCURSOR\fP position if the longer match fails and
+the lexer needs to rollback. Initialization is not
+needed. \fBYYMARKER\fP is used only in C pointer API.
+.TP
+.B \fBYYCTXMARKER\fP
+A pointer\-like l\-value that stores the position of the trailing context
+(usually a pointer of type \fBYYCTYPE*\fP). No initialization is needed.
+It is used only in C pointer API, and only with the lookahead operator
+\fB/\fP\&.
+.TP
+.B \fBYYFILL\fP
+A generic API primitive with one argument \fBlen\fP\&.
+\fBYYFILL\fP should provide at least \fBlen\fP more input characters or fail.
+If \fBre2c:eof\fP is used, then \fBlen\fP is always \fB1\fP and  \fBYYFILL\fP should
+always return to the calling function; zero return value indicates success.
+If \fBre2c:eof\fP is not used, then \fBYYFILL\fP return value is ignored and it
+should not return on failure. The maximum value of \fBlen\fP is \fBYYMAXFILL\fP\&.
+The definition of \fBYYFILL\fP can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYFILL:naked\fP).
+.TP
+.B \fBYYMAXFILL\fP
+An integral constant equal to the maximum value of the argument to
+\fBYYFILL\fP\&.  It can be generated with \fB/*!max:re2c*/\fP directive.
+.TP
+.B \fBYYLESSTHAN\fP
+A generic API primitive with one argument \fBlen\fP\&.
+It should be defined as an r\-value of boolean type that equals \fBtrue\fP if
+and only if there are less than \fBlen\fP input characters left.
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYPEEK\fP
+A generic API primitive with no arguments.
+It should be defined as an r\-value of type \fBYYCTYPE\fP that is equal to the
+character at the current input position. The definition can be either
+function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP).
+.TP
+.B \fBYYSKIP\fP
+A generic API primitive with no arguments.
+\fBYYSKIP\fP should advance the current input position by one
+character. The definition can be either function\-like or free\-form
+depending on the API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUP\fP
+A generic API primitive with no arguments.
+\fBYYBACKUP\fP should save the current input position, which is
+later restored with \fBYYRESTORE\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORE\fP
+A generic API primitive with no arguments.
+\fBYYRESTORE\fP should restore the current input position to the
+value saved by \fBYYBACKUP\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYBACKUPCTX\fP
+A generic API primitive with zero arguments.
+\fBYYBACKUPCTX\fP should save the current input position as the
+position of the trailing context, which is later restored by
+\fBYYRESTORECTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORECTX\fP
+A generic API primitive with no arguments.
+\fBYYRESTORECTX\fP should restore the trailing context position
+saved with \fBYYBACKUPCTX\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYRESTORETAG\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYRESTORETAG\fP should restore the trailing context position
+to the value of \fBtag\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGP\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGP\fP should set \fBtag\fP to the current input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSTAGN\fP
+A generic API primitive with one argument \fBtag\fP, where \fBtag\fP can be a
+pointer or an offset (see submatch extraction section for details).
+\fBYYSTAGN\fP should to set \fBtag\fP to a value that represents non\-existent
+input position.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGP\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGP\fP should append the current position to the submatch history of
+\fBtag\fP (see the submatch extraction section for details.)
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMTAGN\fP
+A generic API primitive with one argument \fBtag\fP\&.
+\fBYYMTAGN\fP should append a value that represents non\-existent input
+position position to the submatch history of \fBtag\fP (see the submatch
+extraction section for details.)
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFT\fP
+A generic API primitive with one argument \fBshift\fP\&.
+\fBYYSHIFT\fP should shift the current input position by
+\fBshift\fP characters (the shift value may be negative). The definition
+can be either function\-like or free\-form depending on the API style
+(see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTSTAG\fP
+A generic  API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTSTAG\fP should shift \fBtag\fP by \fBshift\fP characters
+(the shift value may be negative).
+The definition can be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYSHIFTMTAG\fP
+A generic API primitive with two arguments, \fBtag\fP and \fBshift\fP\&.
+\fBYYSHIFTMTAG\fP should shift the latest value in the history
+of \fBtag\fP by \fBshift\fP characters (the shift value may be negative).
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP).
+.TP
+.B \fBYYMAXNMATCH\fP
+An integral constant equal to the maximal number of POSIX capturing groups
+in a rule. It is generated with \fB/*!maxnmatch:re2c*/\fP directive.
+.TP
+.B \fBYYCONDTYPE\fP
+The type of the condition enum.
+It should be generated either with the \fB/*!types:re2c*/\fP
+directive or the \fB\-t\fP \fB\-\-type\-header\fP option.
+.TP
+.B \fBYYGETCONDITION\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of type \fBYYCONDTYPE\fP that is equal to
+the current condition identifier. The definition can be either function\-like
+or free\-form depending on the API style (see \fBre2c:api:style\fP and
+\fBre2c:define:YYGETCONDITION:naked\fP).
+.TP
+.B \fBYYSETCONDITION\fP
+An API primitive with one argument \fBcond\fP\&.
+The meaning of \fBYYSETCONDITION\fP is to set the current condition
+identifier to \fBcond\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETCONDITION@cond\fP).
+.TP
+.B \fBYYGETSTATE\fP
+An API primitive with zero arguments.
+It should be defined as an r\-value of integer type that is equal to the
+current lexer state. Should be initialized to \fB\-1\fP\&. The definition can be
+either function\-like or free\-form depending on the API style (see
+\fBre2c:api:style\fP and \fBre2c:define:YYGETSTATE:naked\fP).
+.TP
+.B \fBYYSETSTATE\fP
+An API primitive with one argument \fBstate\fP\&.
+The meaning of \fBYYSETSTATE\fP is to set the current lexer state to
+\fBstate\fP\&.
+The definition should be either function\-like or free\-form depending on the
+API style (see \fBre2c:api:style\fP and \fBre2c:define:YYSETSTATE@state\fP).
+.TP
+.B \fBYYDEBUG\fP
+A debug API primitive with two arguments. It can be used to debug the
+generated code (with \fB\-d\fP \fB\-\-debug\-output\fP option). \fBYYDEBUG\fP should
+return no value and accept two arguments: \fBstate\fP (either a DFA state
+index or \fB\-1\fP) and \fBsymbol\fP (the current input symbol).
+.TP
+.B \fByych\fP
+An l\-value of type \fBYYCTYPE\fP that stores the current input character.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByyaccept\fP
+An l\-value of unsigned integral type that stores the number of the latest
+matched rule.
+User definition is necessary only with \fB\-f\fP \fB\-\-storable\-state\fP option.
+.TP
+.B \fByynmatch\fP
+An l\-value of unsigned integral type that stores the number of POSIX
+capturing groups in the matched rule.
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.TP
+.B \fByypmatch\fP
+An array of l\-values that are used to hold the tag values corresponding
+to the capturing parentheses in the matching rule. Array length must be
+at least \fByynmatch * 2\fP (usually \fBYYMAXNMATCH * 2\fP is a good choice).
+Used only with \fB\-P\fP \fB\-\-posix\-captures\fP option.
+.UNINDENT
+.SH OPTIONS
+.sp
+Some of the options have corresponding \fI\%configurations\fP,
+others are global and cannot be changed after re2c starts reading the input file.
+Debug options generally require building re2c in debug configuration.
+Internal options are useful for experimenting with the algorithms used in re2c.
+.INDENT 0.0
+.TP
+.B \fB\-? \-\-help \-h\fP
+Show help message.
+.TP
+.B \fB\-\-api \-\-input <default | custom>\fP
+Specify the API used by the generated code to interface with used\-defined
+code: \fBdefault\fP is the API based on pointer arithmetic (the default for
+C), and \fBcustom\fP is the generic API (the default for Go and Rust).
+.TP
+.B \fB\-\-bit\-vectors \-b\fP
+Optimize conditional jumps using bit masks.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-case\-insensitive\fP
+Treat single\-quoted and double\-quoted strings as case\-insensitive.
+.TP
+.B \fB\-\-case\-inverted\fP
+Invert the meaning of single\-quoted and double\-quoted strings:
+treat single\-quoted strings as case\-sensitive and double\-quoted strings
+as case\-insensitive.
+.TP
+.B \fB\-\-case\-ranges\fP
+Collapse consecutive cases in a switch statements into a range of the form
+\fBlow ... high\fP\&. This syntax is a C/C++ language extension that is
+supported by compilers like GCC, Clang and Tcc. The main advantage over
+using single cases is smaller generated code and faster generation time,
+although for some compilers like Tcc it also results in smaller binary size.
+This option is supported only for C.
+.TP
+.B \fB\-\-computed\-gotos \-g\fP
+Optimize conditional jumps using non\-standard \(dqcomputed goto\(dq extension
+(which must be supported by the compiler). re2c generates jump tables
+only in complex cases with a lot of conditional branches. Complexity
+threshold can be configured with \fBcgoto:threshold\fP configuration. This
+option implies \fB\-\-bit\-vectors\fP\&. It is supported only for C.
+.TP
+.B \fB\-\-conditions \-\-start\-conditions \-c\fP
+Enable support of Flex\-like \(dqconditions\(dq: multiple interrelated lexers
+within one block. This is an alternative to manually specifying different
+re2c blocks connected with \fBgoto\fP or function calls.
+.TP
+.B \fB\-\-depfile FILE\fP
+Write dependency information to \fBFILE\fP in the form of a Makefile rule
+\fB<output\-file> : <input\-file> [include\-file ...]\fP\&. This allows one to
+track build dependencies in the presence of \fBinclude:re2c\fP directives,
+so that updating include files triggers regeneration of the output file.
+This option depends on the \fB\-\-output\fP option.
+.TP
+.B \fB\-\-ebcdic \-\-ecb \-e\fP
+Generate a lexer that reads input in EBCDIC encoding. re2c assumes that the
+character range is 0 \-\- 0xFF and character size is 1 byte.
+.TP
+.B \fB\-\-empty\-class <match\-empty | match\-none | error>\fP
+Define the way re2c treats empty character classes. With \fBmatch\-empty\fP
+(the default) empty class matches empty input (which is illogical, but
+backwards\-compatible). With \fBmatch\-none\fP empty class always fails to match.
+With \fBerror\fP empty class raises a compilation error.
+.TP
+.B \fB\-\-encoding\-policy <fail | substitute | ignore>\fP
+Define the way re2c treats Unicode surrogates.
+With \fBfail\fP re2c aborts with an error when a surrogate is encountered.
+With \fBsubstitute\fP re2c silently replaces surrogates with the error code
+point 0xFFFD. With \fBignore\fP (the default) re2c treats surrogates as
+normal code points. The Unicode standard says that standalone surrogates
+are invalid, but real\-world libraries and programs behave in different ways.
+.TP
+.B \fB\-\-flex\-syntax \-F\fP
+Partial support for Flex syntax: in this mode named definitions don\(aqt need
+the equal sign and the terminating semicolon, and when used they must be
+surrounded with curly braces. Names without curly braces are treated as
+double\-quoted strings.
+.TP
+.B \fB\-\-header \-\-type\-header \-t HEADER\fP
+Generate a \fBHEADER\fP file. The contents of the file can be specified with
+directives \fBheader:re2c:on\fP and \fBheader:re2c:off\fP\&.
+If conditions are used the header will have a condition enum automatically
+appended to it (unless there is an explicit \fBconditions:re2c\fP directive).
+.TP
+.B \fB\-I PATH\fP
+Add \fBPATH\fP to the list of locations which are used when searching for
+include files. This option is useful in combination with \fBinclude:re2c\fP
+directive. re2c looks for \fBFILE\fP in the directory of the parent file and
+in the include locations specified with \fB\-I\fP option.
+.TP
+.B \fB\-\-input\-encoding <ascii | utf8>\fP
+Specify the way re2c parses regular expressions.
+With \fBascii\fP (the default) re2c handles input as ASCII\-encoded: any
+sequence of code units is a sequence of standalone 1\-byte characters.
+With \fButf8\fP re2c handles input as UTF8\-encoded and recognizes multibyte
+characters.
+.TP
+.B \fB\-\-invert\-captures\fP
+Invert the meaning of capturing and non\-capturing groups. By default
+\fB(...)\fP is capturing and \fB(! ...)\fP is non\-capturing. With this option
+\fB(! ...)\fP is capturing and \fB(...)\fP is non\-capturing.
+.TP
+.B \fB\-\-lang <c | go | rust>\fP
+Specify the output language. Supported languages are C, Go and Rust.
+The default is C for re2c, Go for re2go and Rust for re2rust.
+.TP
+.B \fB\-\-leftmost\-captures\fP
+Enable submatch extraction with leftmost greedy capturing groups.
+.TP
+.B \fB\-\-location\-format <gnu | msvc>\fP
+Specify location format in messages.
+With \fBgnu\fP locations are printed as \(aqfilename:line:column: ...\(aq.
+With \fBmsvc\fP locations are printed as \(aqfilename(line,column) ...\(aq.
+The default is \fBgnu\fP\&.
+.TP
+.B \fB\-\-loop\-switch\fP
+Encode DFA in a form of a loop over a switch statement. Individual states
+are switch cases. The current state is stored in a variable \fByystate\fP\&.
+Transitions between states update \fByystate\fP to the case label of the
+destination state and \fBcontinue\fP to the head of the loop. This option is
+always enabled for Rust, as it has no \fBgoto\fP statement and cannot use the
+goto/label approach which is the default for C and Go backends.
+.TP
+.B \fB\-\-nested\-ifs \-s\fP
+Use nested \fBif\fP statements instead of \fBswitch\fP statements in conditional
+jumps. This usually results in more efficient code with non\-optimizing
+compilers.
+.TP
+.B \fB\-\-no\-debug\-info \-i\fP
+Do not output line directives. This may be useful when the generated code is
+stored in a version control system (to avoid huge autogenerated diffs on
+small changes). This option is on by default for Rust, as it does not have
+line directives.
+.TP
+.B \fB\-\-no\-generation\-date\fP
+Suppress date output in the generated file.
+.TP
+.B \fB\-\-no\-version\fP
+Suppress version output in the generated file.
+.TP
+.B \fB\-\-no\-unsafe\fP
+Do not generate \fBunsafe\fP wrapper over \fBYYPEEK\fP (this option is specific
+to Rust). For performance reasons \fBYYPEEK\fP should avoid bounds\-checking,
+as the lexer already performs end\-of\-input checks in a more efficient way.
+The user may choose to provide a safe \fBYYPEEK\fP definition, or a definition
+that is unsafe only in release builds, in which case the \fB\-\-no\-unsafe\fP
+option helps to avoid warnings about redundant \fBunsafe\fP blocks.
+.TP
+.B \fB\-\-output \-o OUTPUT\fP
+Specify the \fBOUTPUT\fP file.
+.TP
+.B \fB\-\-posix\-captures \-P\fP
+Enable submatch extraction with POSIX\-style capturing groups.
+.TP
+.B \fB\-\-reusable \-r\fP
+Deprecated since version 2.2 (reusable blocks are allowed by default now).
+.TP
+.B \fB\-\-skeleton \-S\fP
+Ignore user\-defined interface code and generate a self\-contained \(dqskeleton\(dq
+program. Additionally, generate input files with strings derived from the
+regular grammar and compressed match results that are used to verify
+\(dqskeleton\(dq behavior on all inputs. This option is useful for finding bugs
+in optimizations and code generation. This option is supported only for C.
+.TP
+.B \fB\-\-storable\-state \-f\fP
+Generate a lexer which can store its inner state.
+This is useful in push\-model lexers which are stopped by an outer program
+when there is not enough input, and then resumed when more input becomes
+available. In this mode users should additionally define \fBYYGETSTATE\fP
+and \fBYYSETSTATE\fP primitives, and variables \fByych\fP, \fByyaccept\fP and
+\fBstate\fP should be part of the stored lexer state.
+.TP
+.B \fB\-\-tags \-T\fP
+Enable submatch extraction with tags.
+.TP
+.B \fB\-\-ucs2 \-\-wide\-chars \-w\fP
+Generate a lexer that reads UCS2\-encoded input. re2c assumes that the
+character range is 0 \-\- 0xFFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf8 \-\-utf\-8 \-8\fP
+Generate a lexer that reads input in UTF\-8 encoding. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 1 byte.
+.TP
+.B \fB\-\-utf16 \-\-utf\-16 \-x\fP
+Generate a lexer that reads UTF16\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 2 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-utf32 \-\-unicode \-u\fP
+Generate a lexer that reads UTF32\-encoded input. re2c assumes that the
+character range is 0 \-\- 0x10FFFF and character size is 4 bytes.
+This option implies \fB\-\-nested\-ifs\fP\&.
+.TP
+.B \fB\-\-verbose\fP
+Output a short message in case of success.
+.TP
+.B \fB\-\-vernum \-V\fP
+Show version information in \fBMMmmpp\fP format (major, minor, patch).
+.TP
+.B \fB\-\-version \-v\fP
+Show version information.
+.TP
+.B \fB\-\-single\-pass \-1\fP
+Deprecated. Does nothing (single pass is the default now).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-debug\-output \-d\fP
+Emit \fBYYDEBUG\fP invocations in the generated code. This is useful to trace
+lexer execution.
+.TP
+.B \fB\-\-dump\-adfa\fP
+Debug option: output DFA after tunneling (in .dot format).
+.TP
+.B \fB\-\-dump\-cfg\fP
+Debug option: output control flow graph of tag variables (in .dot format).
+.TP
+.B \fB\-\-dump\-closure\-stats\fP
+Debug option: output statistics on the number of states in closure.
+.TP
+.B \fB\-\-dump\-dfa\-det\fP
+Debug option: output DFA immediately after determinization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-min\fP
+Debug option: output DFA after minimization (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tagopt\fP
+Debug option: output DFA after tag optimizations (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-tree\fP
+Debug option: output DFA under construction with states represented as tag
+history trees (in .dot format).
+.TP
+.B \fB\-\-dump\-dfa\-raw\fP
+Debug option: output DFA under construction with expanded state\-sets
+(in .dot format).
+.TP
+.B \fB\-\-dump\-interf\fP
+Debug option: output interference table produced by liveness analysis of tag
+variables.
+.TP
+.B \fB\-\-dump\-nfa\fP
+Debug option: output NFA (in .dot format).
+.TP
+.B \fB\-\-emit\-dot \-D\fP
+Instead of normal output generate lexer graph in .dot format.
+The output can be converted to an image with the help of Graphviz
+(e.g. something like \fBdot \-Tpng \-odfa.png dfa.dot\fP).
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-\-dfa\-minimization <moore | table>\fP
+Internal option: DFA minimization algorithm used by re2c. The \fBmoore\fP
+option is the Moore algorithm (it is the default). The \fBtable\fP option is
+the \(dqtable filling\(dq algorithm. Both algorithms should produce the same DFA
+up to states relabeling; table filling is simpler and much slower and serves
+as a reference implementation.
+.TP
+.B \fB\-\-eager\-skip\fP
+Internal option: make the generated lexer advance the input position
+eagerly \-\- immediately after reading the input symbol. This changes the
+default behavior when the input position is advanced lazily \-\- after
+transition to the next state.
+.TP
+.B \fB\-\-no\-lookahead\fP
+Internal option, deprecated.
+It used to enable TDFA(0) algorithm. Unlike TDFA(1), TDFA(0) algorithm does
+not use one\-symbol lookahead. It applies register operations to the incoming
+transitions rather than the outgoing ones. Benchmarks showed that TDFA(0)
+algorithm is less efficient than TDFA(1).
+.TP
+.B \fB\-\-no\-optimize\-tags\fP
+Internal option: suppress optimization of tag variables (useful for
+debugging).
+.TP
+.B \fB\-\-posix\-closure <gor1 | gtop>\fP
+Internal option: specify shortest\-path algorithm used for the construction of
+epsilon\-closure with POSIX disambiguation semantics: \fBgor1\fP (the default)
+stands for Goldberg\-Radzik algorithm, and \fBgtop\fP stands for \(dqglobal
+topological order\(dq algorithm.
+.TP
+.B \fB\-\-posix\-prectable <complex | naive>\fP
+Internal option: specify the algorithm used to compute POSIX precedence
+table. The \fBcomplex\fP algorithm computes precedence table in one traversal
+of tag history tree and has quadratic complexity in the number of TNFA
+states; it is the default. The \fBnaive\fP algorithm has worst\-case cubic
+complexity in the number of TNFA states, but it is much simpler than
+\fBcomplex\fP and may be slightly faster in non\-pathological cases.
+.TP
+.B \fB\-\-stadfa\fP
+Internal option, deprecated.
+It used to enable staDFA algorithm, which differs from TDFA in that register
+operations are placed in states rather than on transitions. Benchmarks
+showed that staDFA algorithm is less efficient than TDFA.
+.TP
+.B \fB\-\-fixed\-tags <none | toplevel | all>\fP
+Internal option:
+specify whether the fixed\-tag optimization should be applied to all tags
+(\fBall\fP), none of them (\fBnone\fP), or only those in toplevel concatenation
+(\fBtoplevel\fP). The default is \fBall\fP\&.
+\(dqFixed\(dq tags are those that are located within a fixed distance to some
+other tag (called \(dqbase\(dq). In such cases only the base tag needs to be
+tracked, and the value of the fixed tag can be computed as the value of the
+base tag plus a static offset. For tags that are under alternative or
+repetition it is also necessary to check if the base tag has a no\-match
+value (in that case fixed tag should also be set to no\-match, disregarding
+the offset). For tags in top\-level concatenation the check is not needed,
+because they always match.
+.UNINDENT
+.SH WARNINGS
+.sp
+Warnings can be invividually enabled, disabled and turned into an error.
+.INDENT 0.0
+.TP
+.B \fB\-W\fP
+Turn on all warnings.
+.TP
+.B \fB\-Werror\fP
+Turn warnings into errors. Note that this option alone
+doesn\(aqt turn on any warnings; it only affects those warnings that have
+been turned on so far or will be turned on later.
+.TP
+.B \fB\-W<warning>\fP
+Turn on \fBwarning\fP\&.
+.TP
+.B \fB\-Wno\-<warning>\fP
+Turn off \fBwarning\fP\&.
+.TP
+.B \fB\-Werror\-<warning>\fP
+Turn on \fBwarning\fP and treat it as an error (this implies \fB\-W<warning>\fP).
+.TP
+.B \fB\-Wno\-error\-<warning>\fP
+Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off
+the warning itself.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \fB\-Wcondition\-order\fP
+Warn if the generated program makes implicit assumptions about condition
+numbering. One should use either the \fB\-\-\-header\fP option or the
+\fBconditions:re2c\fP directive to generate a mapping of condition names to
+numbers and then use the autogenerated condition names.
+.TP
+.B \fB\-Wempty\-character\-class\fP
+Warn if a regular expression contains an empty character class. Trying to
+match an empty character class makes no sense: it should always fail.
+However, for backwards compatibility reasons re2c permits empty character
+classes and treats them as empty strings. Use the \fB\-\-empty\-class\fP option
+to change the default behavior.
+.TP
+.B \fB\-Wmatch\-empty\-string\fP
+Warn if a rule is nullable (matches an empty string).
+If the lexer runs in a loop and the empty match is unintentional, the lexer
+may unexpectedly hang in an infinite loop.
+.TP
+.B \fB\-Wswapped\-range\fP
+Warn if the lower bound of a range is greater than its upper bound. The
+default behavior is to silently swap the range bounds.
+.TP
+.B \fB\-Wundefined\-control\-flow\fP
+Warn if some input strings cause undefined control flow in the lexer (the
+faulty patterns are reported). This is a dangerous and common mistake. It
+can be easily fixed by adding the default rule \fB*\fP which has the lowest
+priority, matches any code unit, and always consumes a single code unit.
+.TP
+.B \fB\-Wunreachable\-rules\fP
+Warn about rules that are shadowed by other rules and will never match.
+.TP
+.B \fB\-Wuseless\-escape\fP
+Warn if a symbol is escaped when it shouldn\(aqt be.
+By default, re2c silently ignores such escapes, but this may as well
+indicate a typo or an error in the escape sequence.
+.TP
+.B \fB\-Wnondeterministic\-tags\fP
+Warn if a tag has \fBn\fP\-th degree of nondeterminism, where \fBn\fP is greater
+than 1.
+.TP
+.B \fB\-Wsentinel\-in\-midrule\fP
+Warn if the sentinel symbol occurs in the middle of a rule \-\-\- this may
+cause reads past the end of buffer, crashes or memory corruption in the
+generated lexer. This warning is only applicable if the sentinel method of
+checking for the end of input is used.
+It is set to an error if \fBre2c:sentinel\fP configuration is used.
+.UNINDENT
+.SH BLOCKS AND DIRECTIVES
+.sp
+Below is the list of re2c directives (syntactic constructs that mark the
+beginning and end of the code that should be processed by re2c). Named blocks
+were added in re2c version 2.2. They are exactly the same as unnamed blocks,
+except that the name can be used to reference a block in other parts of the
+program. More information on each directive can be found in the related
+sections.
+.INDENT 0.0
+.TP
+.B \fB/*!re2c[:<name>] ... */\fP
+A global re2c block with an optional name. The block may contain named
+definitions, configurations and rules in any order. Named definitions and
+configurations are defined in the global scope, so they are inherited by
+subsequent blocks. The code for a global block is generated at the point
+where the block is specified.
+.TP
+.B \fB/*!local:re2c[:<name>] ... */\fP
+A local re2c block with an optional name. Unlike global blocks, definitions
+and configurations inside of a local block are not added into the global
+scope. In all other respects local blocks are the same as global blocks.
+.TP
+.B \fB/*!rules:re2c[:<name>] ... */\fP
+A reusable block with an optional name. Rules blocks have the same structure
+as local or global blocks, but they do not produce any code and they can be
+reused multiple times in other blocks with the help of a \fB!use:<name>;\fP
+directive or a \fB/*!use:re2c[:<name>] ... */\fP block. A rules block on its
+own does not add any definitions into the global scope. The code for it is
+generated at the point of use. Prior to re2c version 2.2 rules blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB/*!use:re2c[:<name>] ... */\fP
+A use block that references a previously defined rules block. If the name is
+specified, re2c looks for a rules blocks with this name. Otherwise the most
+recent rules block is used (either a named or an unnamed one). A use block
+can add definitions, configurations and rules of its own, which are added to
+those of the referenced rules block. Prior to re2c version 2.2 use blocks
+required \fB\-r \-\-reusable\fP option.
+.TP
+.B \fB!use:<name>;\fP
+An in\-block use directive that merges a previously defined rules block with
+the specified name into the current block. Named definitions, configurations
+and rules of the referenced block are added to the current ones. Conflicts
+between overlapping rules and configurations are resolved in the usual way:
+the first rule takes priority, and the latest configuration overrides the
+preceding ones. One exception is the special rules \fB*\fP, \fB$\fP and \fB<!>\fP
+for which a block\-local definition always takes priority. A use directive
+can be placed anywhere inside of a block, and multiple use directives are
+allowed.
+.TP
+.B \fB/*!max:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXFILL\fP definition.
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXFILL\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXFILL <n>\fP), or a global variable for Go
+(\fBvar YYMAXFILL int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXFILL\fP\&.
+.TP
+.B \fB/*!maxnmatch:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates \fBYYMAXNMATCH\fP definition (it requires
+\fB\-P \-\-posix\-captures\fP option).
+An optional list of block names specifies which blocks should be included
+when computing \fBYYMAXNMATCH\fP value (if the list is empty, all blocks are
+included).
+By default the generated code is a macro\-definition for C
+(\fB#define YYMAXNMATCH <n>\fP), or a global variable for Go
+(\fBvar YYMAXNMATCH int = <n>\fP). It can be customized with an optional
+configuration \fBformat\fP that specifies a template string where \fB@@{max}\fP
+(or \fB@@\fP for short) is replaced with the numeric value of \fBYYMAXNMATCH\fP\&.
+.TP
+.B \fB/*!stags:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!mtags:re2c[:<name1>[:<name2>...]] ... */\fP
+Directives that specify a template piece of code that is expanded for each
+s\-tag/m\-tag variable generated by re2c.
+An optional list of block names specifies which blocks should be included
+when computing the set of tag variables (if the list is empty, all blocks
+are included).
+There are two optional configurations: \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{tag}\fP (or
+\fB@@\fP for short) is replaced with the name of each tag variable.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different tag variables.
+.TP
+.B \fB/*!getstate:re2c[:<name1>[:<name2>...]] ... */\fP
+A directive that generates conditional dispatch on the lexer state (it
+requires \fB\-\-storable\-state\fP option).
+An optional list of block names specifies which blocks should be included in
+the state dispatch. The default transition goes to the start label of the
+first block on the list. If the list is empty, all blocks are included, and
+the default transition goes to the first block in the file that has a start
+label.
+This directive is incompatible with the \fB\-\-loop\-switch\fP option and Rust,
+as it requires cross\-block transitions that are unsupported without the
+\fBgoto\fP statement.
+.TP
+.B \fB/*!conditions:re2c[:<name1>[:<name2>...]] ... */\fP, \fB/*!types:re2c... */\fP
+A directive that generates condition enumeration (it requires
+\fB\-\-conditions\fP option).
+An optional list of block names specifies which blocks should be included
+when computing the set of conditions (if the list is empty, all blocks are
+included).
+By default the generated code is an enumeration \fBYYCONDTYPE\fP\&. It can be
+customized with optional configurations \fBformat\fP and \fBseparator\fP\&.
+Configuration \fBformat\fP specifies a template string where \fB@@{cond}\fP (or
+\fB@@\fP for short) is replaced with the name of each condition, and
+\fB@@{num}\fP is replaced with a numeric index of that condition.
+Configuration \fBseparator\fP specifies a piece of code used to join the
+generated \fBformat\fP pieces for different conditions.
+.TP
+.B \fB/*!include:re2c <file> */\fP
+This directive allows one to include \fB<file>\fP, which must be a double\-quoted
+file path. The contents of the file are literally substituted in place of
+the directive, in the same way as \fB#include\fP works in C/C++. This
+directive can be used together with the \fB\-\-depfile\fP option to generate
+build system dependencies on the included files.
+.TP
+.B \fB!include <file>;\fP
+This directive is the same as \fB/*!include:re2c <file> */\fP, except that it
+should be used inside of a re2c block.
+.TP
+.B \fB/*!header:re2c:on*/\fP
+This directive marks the start of header file. Everything after it and up to
+the following \fB/*!header:re2c:off*/\fP directive is processed by re2c and
+written to the header file specified with \fB\-t \-\-type\-header\fP option.
+.TP
+.B \fB/*!header:re2c:off*/\fP
+This directive marks the end of header file started with
+\fB/*!header:re2c:on*/\fP\&.
+.TP
+.B \fB/*!ignore:re2c ... */\fP
+A block which contents are ignored and removed from the output file.
+.TP
+.B \fB%{ ... %}\fP
+A global re2c block in the \fB\-\-flex\-support\fP mode. This is deprecated and
+exists for backward compatibility.
+.UNINDENT
+.SH CONFIGURATIONS
+.INDENT 0.0
+.TP
+.B \fBre2c:api\fP, \fBre2c:flags:input\fP
+Same as the \fB\-\-api\fP option.
+.TP
+.B \fBre2c:api:sigil\fP
+Specify the marker (\(dqsigil\(dq) that is used for argument placeholders in the
+API primitives. The default is \fB@@\fP\&. A placeholder starts with sigil
+followed by the argument name in curly braces. For example, if sigil is set
+to \fB$\fP, then placeholders will have the form \fB${name}\fP\&. Single\-argument
+APIs may use shorthand notation without the name in braces. This option can
+be overridden by options for individual API primitives, e.g.
+\fBre2c:define:YYFILL@len\fP for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:api:style\fP
+Specify API style. Possible values are \fBfunctions\fP (the default for C) and
+\fBfree\-form\fP (the default for Go and Rust).
+In \fBfunctions\fP style API primitives are generated with an argument list in
+parentheses following the name of the primitive. The arguments are provided
+only for autogenerated parameters (such as the number of characters passed
+to \fBYYFILL\fP), but not for the general lexer context, so the primitives
+behave more like macros in C/C++ or closures in Go and Rust.
+In free\-form style API primitives do not have a fixed form: they should be
+defined as strings containing free\-form pieces of code with interpolated
+variables of the form \fB@@{var}\fP or \fB@@\fP (they correspond to arguments in
+function\-like style).
+This configuration may be overridden for individual API primitives, see for
+example \fBre2c:define:YYFILL:naked\fP configuration for \fBYYFILL\fP\&.
+.TP
+.B \fBre2c:bit\-vectors\fP, \fBre2c:flags:bit\-vectors\fP, \fBre2c:flags:b\fP
+Same as the \fB\-\-bit\-vectors\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-insensitive\fP, \fBre2c:flags:case\-insensitive\fP
+Same as the \fB\-\-case\-insensitive\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:case\-inverted\fP, \fBre2c:flags:case\-inverted\fP
+Same as the \fB\-\-case\-inverted\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:case\-ranges\fP, \fBre2c:flags:case\-ranges\fP
+Same as the \fB\-\-case\-ranges\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos\fP, \fBre2c:flags:computed\-gotos\fP, \fBre2c:flags:g\fP
+Same as the \fB\-\-computed\-gotos\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:computed\-gotos:threshold\fP, \fBre2c:cgoto:threshold\fP
+If computed \fBgoto\fP is used, this configuration specifies the complexity
+threshold that triggers the generation of jump tables instead of nested
+\fBif\fP statements and bitmaps. The default value is \fB9\fP\&.
+.TP
+.B \fBre2c:cond:goto\fP
+Specifies a piece of code used for the autogenerated shortcut rules \fB:=>\fP
+in conditions. The default is \fBgoto @@;\fP\&.
+The \fB@@\fP placeholder is substituted with condition name (see
+configurations \fBre2c:api:sigil\fP and \fBre2c:cond:goto@cond\fP).
+.TP
+.B \fBre2c:cond:goto@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:goto\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:divider\fP
+Defines the divider for condition blocks.
+The default value is \fB/* *********************************** */\fP\&.
+Placeholders are substituted with condition name (see \fBre2c:api;sigil\fP and
+\fBre2c:cond:divider@cond\fP).
+.TP
+.B \fBre2c:cond:divider@cond\fP
+Specifies the sigil used for argument substitution in \fBre2c:cond:divider\fP
+definition. The default is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:cond:prefix\fP, \fBre2c:condprefix\fP
+Specifies the prefix used for condition labels.
+The default is \fByyc_\fP\&.
+.TP
+.B \fBre2c:cond:enumprefix\fP, \fBre2c:condenumprefix\fP
+Specifies the prefix used for condition identifiers.
+The default is \fByyc\fP\&.
+.TP
+.B \fBre2c:debug\-output\fP, \fBre2c:flags:debug\-output\fP, \fBre2c:flags:d\fP
+Same as the \fB\-\-debug\-output\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:define:YYBACKUP\fP
+Defines generic API primitive \fBYYBACKUP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYBACKUPCTX\fP
+Defines generic API primitive \fBYYBACKUPCTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYCONDTYPE\fP
+Defines \fBYYCONDTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTYPE\fP
+Defines \fBYYCTYPE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCTXMARKER\fP
+Defines API primitive \fBYYCTXMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYCURSOR\fP
+Defines API primitive \fBYYCURSOR\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYDEBUG\fP
+Defines API primitive \fBYYDEBUG\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL\fP
+Defines API primitive \fBYYFILL\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYFILL@len\fP
+Specifies the sigil used for argument substitution in \fBYYFILL\fP
+definition. Defaults to \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYFILL:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for \fBYYFILL\fP\&.
+Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETCONDITION\fP
+Defines API primitive \fBYYGETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYGETSTATE\fP
+Defines API primitive \fBYYGETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYGETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYGETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYLESSTHAN\fP
+Defines generic API primitive \fBYYLESSTHAN\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYLIMIT\fP
+Defines API primitive \fBYYLIMIT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMARKER\fP
+Defines API primitive \fBYYMARKER\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGN\fP
+Defines generic API primitive \fBYYMTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYMTAGP\fP
+Defines generic API primitive \fBYYMTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYPEEK\fP
+Defines generic API primitive \fBYYPEEK\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYRESTORE\fP
+Defines generic API primitive \fBYYRESTORE\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORECTX\fP
+Defines generic API primitive \fBYYRESTORECTX\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYRESTORETAG\fP
+Defines generic API primitive \fBYYRESTORETAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSETCONDITION\fP
+Defines API primitive \fBYYSETCONDITION\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETCONDITION@cond\fP
+Specifies the sigil used for argument substitution in \fBYYSETCONDITION\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETCONDITION:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETCONDITION\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSETSTATE\fP
+Defines API primitive \fBYYSETSTATE\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSETSTATE@state\fP
+Specifies the sigil used for argument substitution in \fBYYSETSTATE\fP
+definition. The default value is \fB@@\fP\&.
+Overrides the more generic \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:define:YYSETSTATE:naked\fP
+Overrides the more generic \fBre2c:api:style\fP configuration for
+\fBYYSETSTATE\fP\&. Zero value corresponds to free\-form API style.
+.TP
+.B \fBre2c:define:YYSKIP\fP
+Defines generic API primitive \fBYYSKIP\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFT\fP
+Defines generic API primitive \fBYYSHIFT\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSHIFTMTAG\fP
+Defines generic API primitive \fBYYSHIFTMTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSHIFTSTAG\fP
+Defines generic API primitive \fBYYSHIFTSTAG\fP (see the API primitives
+section).
+.TP
+.B \fBre2c:define:YYSTAGN\fP
+Defines generic API primitive \fBYYSTAGN\fP (see the API primitives section).
+.TP
+.B \fBre2c:define:YYSTAGP\fP
+Defines generic API primitive \fBYYSTAGP\fP (see the API primitives section).
+.TP
+.B \fBre2c:empty\-class\fP, \fBre2c:flags:empty\-class\fP
+Same as the \fB\-\-empty\-class\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:encoding:ebcdic\fP, \fBre2c:flags:ecb\fP, \fBre2c:flags:e\fP
+Same as the \fB\-\-ebcdic\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:ucs2\fP, \fBre2c:flags:wide\-chars\fP, \fBre2c:flags:w\fP
+Same as the \fB\-\-ucs2\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf8\fP, \fBre2c:flags:utf\-8\fP, \fBre2c:flags:8\fP
+Same as the \fB\-\-utf8\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf16\fP, \fBre2c:flags:utf\-16\fP, \fBre2c:flags:x\fP
+Same as the \fB\-\-utf16\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding:utf32\fP, \fBre2c:flags:unicode\fP, \fBre2c:flags:u\fP
+Same as the \fB\-\-utf32\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:encoding\-policy\fP, \fBre2c:flags:encoding\-policy\fP
+Same as the \fB\-\-encoding\-policy\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:eof\fP
+Specifies the sentinel symbol used with the end\-of\-input rule \fB$\fP\&. The
+default value is \fB\-1\fP (\fB$\fP rule is not used). Other possible values
+include all valid code units. Only decimal numbers are recognized.
+.TP
+.B \fBre2c:header\fP, \fBre2c:flags:type\-header\fP, \fBre2c:flags:t\fP
+Specifies the name of the generated header file relative to the directory of
+the output file. Same as the \fB\-\-header\fP option except that the file path
+is relative.
+.TP
+.B \fBre2c:indent:string\fP
+Specifies the string used for indentation. The default is a single tab
+character \fB\(dq\et\(dq\fP\&. Indent string should contain whitespace characters only.
+To disable indentation entirely, set this configuration to an empty string.
+.TP
+.B \fBre2c:indent:top\fP
+Specifies the minimum amount of indentation to use. The default value is
+zero. The value should be a non\-negative integer number.
+.TP
+.B \fBre2c:invert\-captures\fP
+Same as the \fB\-\-invert\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:label:prefix\fP, \fBre2c:labelprefix\fP
+Specifies the prefix used for DFA state labels. The default is \fByy\fP\&.
+.TP
+.B \fBre2c:label:start\fP, \fBre2c:startlabel\fP
+Controls the generation of a block start label. The default value is zero,
+which means that the start label is generated only if it is used. An integer
+value greater than zero forces the generation of start label even if it is
+unused by the lexer. A string value also forces start label generation and
+sets the label name to the specified string. This configuration applies only
+to the current block (it is reset to default for the next block).
+.TP
+.B \fBre2c:label:yyFillLabel\fP
+Specifies the prefix of \fBYYFILL\fP labels used with \fBre2c:eof\fP and in
+storable state mode.
+.TP
+.B \fBre2c:label:yyloop\fP
+Specifies the name of the label marking the start of the lexer loop with
+\fB\-\-loop\-switch\fP option. The default is \fByyloop\fP\&.
+.TP
+.B \fBre2c:label:yyNext\fP
+Specifies the name of the optional label that follows \fBYYGETSTATE\fP switch
+in storable state mode (enabled with \fBre2c:state:nextlabel\fP). The default
+is \fByyNext\fP\&.
+.TP
+.B \fBre2c:leftmost\-captures\fP
+Same as the \fB\-\-leftmost\-captures\fP option, but can be configured on
+per\-block basis.
+.TP
+.B \fBre2c:lookahead\fP, \fBre2c:flags:lookahead\fP
+Deprecated (see the deprecated \fB\-\-no\-lookahead\fP option).
+.TP
+.B \fBre2c:nested\-ifs\fP, \fBre2c:flags:nested\-ifs\fP, \fBre2c:flags:s\fP
+Same as the \fB\-\-nested\-ifs\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:posix\-captures\fP, \fBre2c:flags:posix\-captures\fP, \fBre2c:flags:P\fP
+Same as the \fB\-\-posix\-captures\fP option, but can be configured on per\-block
+basis.
+.TP
+.B \fBre2c:tags\fP, \fBre2c:flags:tags\fP, \fBre2c:flags:T\fP
+Same as the \fB\-\-tags\fP option, but can be configured on per\-block basis.
+.TP
+.B \fBre2c:tags:expression\fP
+Specifies the expression used for tag variables.
+By default re2c generates expressions of the form \fByyt<N>\fP\&. This might
+be inconvenient, for example if tag variables are defined as fields in a
+struct. All occurrences of \fB@@{tag}\fP or \fB@@\fP are replaced with the
+actual tag name. For example, \fBre2c:tags:expression = \(dqs.@@\(dq;\fP results
+in expressions of the form \fBs.yyt<N>\fP in the generated code.
+See also \fBre2c:api:sigil\fP configuration.
+.TP
+.B \fBre2c:tags:prefix\fP
+Specifies the prefix for tag variable names. The default is \fByyt\fP\&.
+.TP
+.B \fBre2c:sentinel\fP
+Specifies the sentinel symbol used for the end\-of\-input checks (when bounds
+checks are disabled with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP is not
+set). This configuration does not affect code generation: its purpose is to
+verify that the sentinel is not allowed in the middle of a rule, and ensure
+that the lexer won\(aqt read past the end of buffer. The default value is
+\fI\-1\(ga\fP (in that case re2c assumes that the sentinel is zero, which is the
+most common case). Only decimal numbers are recognized.
+.TP
+.B \fBre2c:state:abort\fP
+If set to a positive integer value, changes the default case in
+\fBYYGETSTATE\fP switch: by default it aborts the program, and an explicit
+\fB\-1\fP case contains transition to the start of the block.
+.TP
+.B \fBre2c:state:nextlabel\fP
+Controls if the \fBYYGETSTATE\fP switch is followed by an \fByyNext\fP label
+(the default value is zero, which corresponds to no label).
+Alternatively one can use \fBre2c:label:start\fP to generate a specific start
+label, or an explicit \fBgetstate:re2c\fP directive to generate the
+\fBYYGETSTATE\fP switch separately from the lexer block.
+.TP
+.B \fBre2c:unsafe\fP, \fBre2c:flags:unsafe\fP
+Same as the \fB\-\-no\-unsafe\fP option, but can be configured on per\-block
+basis.
+If set to zero, it suppresses the generation of \fBunsafe\fP wrappers around
+\fBYYPEEK\fP\&. The default is non\-zero (wrappers are generated).
+This configuration is specific to Rust.
+.TP
+.B \fBre2c:variable:yyaccept\fP
+Specifies the name of the \fByyaccept\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yybm\fP
+Specifies the name of the \fByybm\fP variable (used for bitmaps).
+.TP
+.B \fBre2c:variable:yybm:hex\fP, \fBre2c:yybm:hex\fP
+If set to nonzero, bitmaps for the \fB\-\-bit\-vectors\fP option are generated
+in hexadecimal format. The default is zero (bitmaps are in decimal format).
+.TP
+.B \fBre2c:variable:yych\fP
+Specifies the name of the \fByych\fP variable (see the API primitives
+section).
+.TP
+.B \fBre2c:variable:yych:emit\fP, \fBre2c:yych:emit\fP
+If set to zero, \fByych\fP definition is not generated.
+The default is non\-zero.
+.TP
+.B \fBre2c:variable:yych:conversion\fP, \fBre2c:yych:conversion\fP
+If set to non\-zero, re2c automatically generates a conversion to \fBYYCTYPE\fP
+every time \fByych\fP is read. The default is to zero (no conversion).
+.TP
+.B \fBre2c:variable:yyctable\fP
+Specifies the name of the \fByyctable\fP variable (the jump table generated
+for \fBYYGETCONDITION\fP switch with \fB\-\-computed\-gotos\fP option).
+.TP
+.B \fBre2c:variable:yytarget\fP
+Specifies the name of the \fByytarget\fP variable.
+.TP
+.B \fBre2c:variable:yystable\fP
+Deprecated.
+.TP
+.B \fBre2c:variable:yystate\fP
+Specifies the name of the \fByystate\fP variable (used with the
+\fB\-\-loop\-switch\fP option to store the current DFA state).
+.TP
+.B \fBre2c:yyfill:check\fP
+If set to zero, suppresses the generation of pre\-\fBYYFILL\fP check for the
+number of input characters (the \fBYYLESSTHAN\fP definition in generic API and
+the \fBYYLIMIT\fP\-based comparison in C pointer API). The default is non\-zero
+(generate the check).
+.TP
+.B \fBre2c:yyfill:enable\fP
+If set to zero, suppresses the generation of \fBYYFILL\fP (together
+with the check). This should be used when the whole input fits into one piece
+of memory (there is no need for buffering) and the end\-of\-input checks do not
+rely on the \fBYYFILL\fP checks (e.g. if a sentinel character is used).
+Use warnings (\fB\-W\fP option) and \fBre2c:sentinel\fP configuration to verify
+that the generated lexer cannot read past the end of input.
+The default is non\-zero (\fBYYFILL\fP is enabled).
+.TP
+.B \fBre2c:yyfill:parameter\fP
+If set to zero, suppresses the generation of parameter passed to \fBYYFILL\fP\&.
+The parameter is the minimum number of characters that must be supplied.
+Defaults to non\-zero (the parameter is generated).
+This configuration can be overridden with \fBre2c:define:YYFILL:naked\fP or
+\fBre2c:api:style\fP\&.
+.UNINDENT
+.SH REGULAR EXPRESSIONS
+.sp
+re2c uses the following syntax for regular expressions:
+.INDENT 0.0
+.IP \(bu 2
+\fB\(dqfoo\(dq\fP case\-sensitive string literal
+.IP \(bu 2
+\fB\(aqfoo\(aq\fP case\-insensitive string literal
+.IP \(bu 2
+\fB[a\-xyz]\fP, \fB[^a\-xyz]\fP character class (possibly negated)
+.IP \(bu 2
+\fB\&.\fP any character except newline
+.IP \(bu 2
+\fBR \e S\fP difference of character classes \fBR\fP and \fBS\fP
+.IP \(bu 2
+\fBR*\fP zero or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR+\fP one or more occurrences of \fBR\fP
+.IP \(bu 2
+\fBR?\fP optional \fBR\fP
+.IP \(bu 2
+\fBR{n}\fP repetition of \fBR\fP exactly \fBn\fP times
+.IP \(bu 2
+\fBR{n,}\fP repetition of \fBR\fP at least \fBn\fP times
+.IP \(bu 2
+\fBR{n,m}\fP repetition of \fBR\fP from \fBn\fP to \fBm\fP times
+.IP \(bu 2
+\fB(R)\fP just \fBR\fP; parentheses are used to override precedence.
+If submatch extraction is enabled, \fB(R)\fP is a capturing or a
+non\-capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fB(!R)\fP
+If submatch extraction is enabled, \fB(!R)\fP is a non\-capturing or a
+capturing group depending on \fB\-\-invert\-captures\fP option.
+.IP \(bu 2
+\fBR S\fP concatenation: \fBR\fP followed by \fBS\fP
+.IP \(bu 2
+\fBR | S\fP alternative: \fBR or S\fP
+.IP \(bu 2
+\fBR / S\fP lookahead: \fBR\fP followed by \fBS\fP, but \fBS\fP is not consumed
+.IP \(bu 2
+\fBname\fP the regular expression defined as \fBname\fP (or literal string
+\fB\(dqname\(dq\fP in Flex compatibility mode)
+.IP \(bu 2
+\fB{name}\fP the regular expression defined as \fBname\fP in Flex
+compatibility mode
+.IP \(bu 2
+\fB@stag\fP an \fIs\-tag\fP: saves the last input position at which \fB@stag\fP
+matches in a variable named \fBstag\fP
+.IP \(bu 2
+\fB#mtag\fP an \fIm\-tag\fP: saves all input positions at which \fB#mtag\fP matches
+in a variable named \fBmtag\fP
+.UNINDENT
+.sp
+Character classes and string literals may contain the following escape
+sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP,
+octal escapes \fB\eooo\fP and hexadecimal escapes \fB\exhh\fP, \fB\euhhhh\fP and
+\fB\eUhhhhhhhh\fP\&.
+.SH HANDLING THE END OF INPUT
+.sp
+One of the main problems for the lexer is to know when to stop.
+There are a few terminating conditions:
+.INDENT 0.0
+.IP \(bu 2
+the lexer may match some rule (including default rule \fB*\fP) and come to a
+final state
+.IP \(bu 2
+the lexer may fail to match any rule and come to a default state
+.IP \(bu 2
+the lexer may reach the end of input
+.UNINDENT
+.sp
+The first two conditions terminate the lexer in a \(dqnatural\(dq way: it comes to a
+state with no outgoing transitions, and the matching automatically stops. The
+third condition, end of input, is different: it may happen in any state, and the
+lexer should be able to handle it. Checking for the end of input interrupts the
+normal lexer workflow and adds conditional branches to the generated program,
+therefore it is necessary to minimize the number of such checks. re2c supports a
+few different methods for handling the end of input. Which one to use depends on
+the complexity of regular expressions, the need for buffering, performance
+considerations and other factors. Here is a list of methods:
+.INDENT 0.0
+.IP \(bu 2
+\fBSentinel.\fP
+This method eliminates the need for the end of input checks altogether. It is
+simple and efficient, but limited to the case when there is a natural
+\(dqsentinel\(dq character that can never occur in valid input. This character may
+still occur in invalid input, but it should not be allowed by the regular
+expressions, except perhaps as the last character of a rule. The sentinel is
+appended at the end of input and serves as a stop signal: when the lexer reads
+this character, it is either a syntax error or the end of input. In both
+cases the lexer should stop. This method is used if \fBYYFILL\fP is disabled
+with \fBre2c:yyfill:enable = 0;\fP and \fBre2c:eof\fP has the default value
+\fB\-1\fP\&.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBSentinel with bounds checks.\fP
+This method is generic: it allows to handle any input without restrictions on
+the regular expressions. The idea is to reduce the number of end of input
+checks by performing them only on certain characters. Similar to the
+\(dqsentinel\(dq method, one of the characters is chosen as a \(dqsentinel\(dq and
+appended at the end of input. However, there is no restriction on where the
+sentinel may occur (in fact, any character can be chosen for a sentinel).
+When the lexer reads this character, it additionally performs a bounds check.
+If the current position is within bounds, the lexer resumes matching and
+handles the sentinel as a regular character. Otherwise it invokes \fBYYFILL\fP
+(unless it is disabled). If more input is supplied, the lexer will rematch the
+last character and continue as if the sentinel wasn\(aqt there. Otherwise it must
+be the real end of input, and the lexer stops. This method is used when
+\fBre2c:eof\fP has non\-negative value (it should be set to the numeric value of
+the sentinel). \fBYYFILL\fP is optional.
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBBounds checks with padding.\fP
+This method is generic, and it may be faster than the \(dqsentinel with bounds
+checks\(dq method, but it is also more complex. The idea is to partition DFA
+states into strongly connected components (SCCs) and generate a single check
+per SCC for enough characters to cover the longest non\-looping path in this
+SCC. This reduces the number of checks, but there is a problem with short
+lexemes at the end of input, as the check requires enough characters to cover
+the longest lexeme. This can be fixed by padding the input with a few fake
+characters that do not form a valid lexeme suffix (so that the lexer cannot
+match them). The length of padding should be \fBYYMAXFILL\fP, generated with
+\fB/*!max:re2c*/\fP\&. If there is not enough input, the lexer invokes \fBYYFILL\fP
+which should supply at least the required number of characters or not return.
+This method is used if \fBYYFILL\fP is enabled and \fBre2c:eof\fP is \fB\-1\fP
+(this is the default configuration).
+.nf
+
+.fi
+.sp
+.IP \(bu 2
+\fBCustom checks.\fP
+Generic API allows to override basic operations like reading a character,
+which makes it possible to include the end\-of\-input checks as part of them.
+This approach is error\-prone and should be used with caution. To use a custom
+method, enable generic API with \fB\-\-api custom\fP or \fBre2c:api = custom;\fP and
+disable default bounds checks with \fBre2c:yyfill:enable = 0;\fP or
+\fBre2c:yyfill:check = 0;\fP\&.
+.UNINDENT
+.sp
+The following subsections contain an example of each method.
+.SS Sentinel
+.sp
+This example uses a sentinel character to handle the end of input. The program
+counts space\-separated words in a null\-terminated string. The sentinel is null:
+it is the last character of each input string, and it is not allowed in the
+middle of a lexeme by any of the rules (in particular, it is not included in
+character ranges where it is easy to overlook). If a null occurs in the middle
+of a string, it is a syntax error and the lexer will match default rule \fB*\fP,
+but it won\(aqt read past the end of input or crash (use
+\fI\%\-Wsentinel\-in\-midrule\fP
+warning and \fBre2c:sentinel\fP configuration to verify this). Configuration
+\fBre2c:yyfill:enable = 0;\fP suppresses the generation of bounds checks and
+\fBYYFILL\fP invocations.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+// Expects a null\-terminated string.
+fn lex(yyinput: [:0]const u8) i32 {
+    var yycursor: u32 = 0;
+    var count: i32 = 0;
+
+    loop: while (true) {
+        %{
+            re2c:yyfill:enable = 0;
+
+            *      { return \-1; }
+            [\ex00] { return count; }
+            [a\-z]+ { count += 1; continue :loop; }
+            [ ]+   { continue :loop; }
+        %}
+    }
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dq\(dq), 0);
+    try std.testing.expectEqual(lex(\(dqone two three\(dq), 3);
+    try std.testing.expectEqual(lex(\(dqf0ur\(dq), \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Sentinel with bounds checks
+.sp
+This example uses sentinel with bounds checks to handle the end of input (this
+method was added in version 1.2). The program counts space\-separated
+single\-quoted strings. The sentinel character is null, which is specified with
+\fBre2c:eof = 0;\fP configuration. As in the \fI\%sentinel\fP method, null is the last
+character of each input string, but it is allowed in the middle of a rule (for
+example, \fB\(aqaaa\e0aa\(aq\e0\fP is valid input, but \fB\(aqaaa\e0\fP is a syntax error).
+Bounds checks are generated in each state that matches an input character, but
+they are scoped to the branch that handles null. Bounds checks are of the form
+\fBYYLIMIT <= YYCURSOR\fP or \fBYYLESSTHAN(1)\fP with generic API. If the check
+condition is true, lexer has reached the end of input and should stop
+(\fBYYFILL\fP is disabled with \fBre2c:yyfill:enable = 0;\fP as the input fits into
+one buffer, see the \fI\%YYFILL with sentinel\fP section for an example that uses
+\fBYYFILL\fP). Reaching the end of input opens three possibilities: if the lexer
+is in the initial state it will match the end\-of\-input rule \fB$\fP, otherwise it
+may fallback to a previously matched rule (including default rule \fB*\fP) or go
+to a default state, causing
+\fI\%\-Wundefined\-control\-flow\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+// Expects a null\-terminated string.
+fn lex(yyinput: [:0]const u8) i32 {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    const yylimit: usize = yyinput.len; // points at the terminating null
+    var count: i32 = 0;
+
+    loop: while (true) {
+        %{
+            re2c:yyfill:enable = 0;
+            re2c:eof = 0;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            *    { return \-1; }
+            $    { return count; }
+            str  { count += 1; continue :loop; }
+            [ ]+ { continue :loop; }
+        %}
+    }
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dq\(dq), 0);
+    try std.testing.expectEqual(lex(\(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq), 3);
+    try std.testing.expectEqual(lex(\(dq\(aqunterminated\e\e\(aq\(dq), \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Bounds checks with padding
+.sp
+This example uses bounds checks with padding to handle the end of input (this
+method is enabled by default). The program counts space\-separated single\-quoted
+strings. There is a padding of \fBYYMAXFILL\fP null characters appended at the end
+of input, where \fBYYMAXFILL\fP value is autogenerated with \fB/*!max:re2c*/\fP\&. It
+is not necessary to use null for padding \-\-\- any characters can be used as long
+as they do not form a valid lexeme suffix (in this example padding should not
+contain single quotes, as they may be mistaken for a suffix of a single\-quoted
+string). There is a \(dqstop\(dq rule that matches the first padding character (null)
+and terminates the lexer (note that it checks if null is at the beginning of
+padding, otherwise it is a syntax error). Bounds checks are generated only in
+some states that are determined by the strongly connected components of the
+underlying automaton. Checks have the form \fB(YYLIMIT \- YYCURSOR) < n\fP or
+\fBYYLESSTHAN(n)\fP with generic API, where \fBn\fP is the minimum number of
+characters that are needed for the lexer to proceed (it also means that the next
+bounds check will occur in at most \fBn\fP characters). If the check condition is
+true, the lexer has reached the end of input and will invoke \fBYYFILL(n)\fP that
+should either supply at least \fBn\fP input characters or not return. In this
+example \fBYYFILL\fP always fails and terminates the lexer with an error (which is
+fine because the input fits into one buffer). See the \fI\%YYFILL with padding\fP
+section for an example that refills the input buffer with \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+%{max %}
+
+fn lex(str: []const u8) !i32 {
+    // Create a copy of the input string padded with yymaxfill zeroes at the end.
+    var yyinput = try std.testing.allocator.alloc(u8, str.len + yymaxfill);
+    defer std.testing.allocator.free(yyinput);
+    std.mem.copy(u8, yyinput[0..], str);
+    std.mem.copy(u8, yyinput[str.len..], &[_]u8{0} ** yymaxfill); // zero padding
+
+    var yycursor: usize = 0;
+    var yylimit: usize = yyinput.len;
+    var count: i32 = 0;
+
+    loop: while (true) {
+        %{
+            re2c:define:YYFILL = \(dqreturn \-1;\(dq;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            [\ex00] {
+                // Check that it is the sentinel, not some unexpected null.
+                return if (yycursor \- 1 == str.len) count else \-1;
+            }
+            str  { count += 1; continue :loop; }
+            [ ]+ { continue :loop; }
+            *    { return \-1; }
+        %}
+    }
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dq\(dq), 0);
+    try std.testing.expectEqual(lex(\(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq), 3);
+    try std.testing.expectEqual(lex(\(dq\(aqunterminated\e\e\(aq\(dq), \-1);
+    try std.testing.expectEqual(lex(\(dq\(aqunexpected \ex00 null\e\e\(aq\(dq), \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Custom checks
+.sp
+This example uses a custom end\-of\-input handling method based on generic API.
+The program counts space\-separated single\-quoted strings. It is the same as the
+\fI\%sentinel\fP example, except that the input is not null\-terminated. To cover up
+for the absence of a sentinel character at the end of input, \fBYYPEEK\fP is
+redefined to perform a bounds check before it reads the next input character.
+This is inefficient because checks are done very often. If the check condition
+fails, \fBYYPEEK\fP returns the real character, otherwise it returns a fake
+sentinel character.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+// Expects a string without terminating null.
+fn lex(str: []const u8) i32 {
+    var cur: usize = 0;
+    var count: i32 = 0;
+
+    loop: while (true) {
+        %{
+            re2c:api = generic;
+            re2c:yyfill:enable = 0;
+            // YYPEEK returns \(dqfake\(dq terminating null if cursor has reached limit.
+            re2c:define:YYPEEK = \(dqif (cur >= str.len) 0 else str[cur]\(dq;
+            re2c:define:YYSKIP = \(dqcur += 1;\(dq;
+
+            *      { return \-1; }
+            [\ex00] { return count; }
+            [a\-z]+ { count += 1; continue :loop; }
+            [ ]+   { continue :loop; }
+        %}
+    }
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dq\(dq), 0);
+    try std.testing.expectEqual(lex(\(dqone two three\(dq), 3);
+    try std.testing.expectEqual(lex(\(dqf0ur\(dq), \-1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH BUFFER REFILLING
+.sp
+The need for buffering arises when the input cannot be mapped in memory all at
+once: either it is too large, or it comes in a streaming fashion (like reading
+from a socket). The usual technique in such cases is to allocate a fixed\-sized
+memory buffer and process input in chunks that fit into the buffer. When the
+current chunk is processed, it is moved out and new data is moved in. In
+practice it is somewhat more complex, because lexer state consists not of a
+single input position, but a set of interrelated positions:
+.INDENT 0.0
+.IP \(bu 2
+cursor: the next input character to be read (\fBYYCURSOR\fP in C pointer API or
+\fBYYSKIP\fP/\fBYYPEEK\fP in generic API)
+.IP \(bu 2
+limit: the position after the last available input character (\fBYYLIMIT\fP in
+C pointer API, implicitly handled by \fBYYLESSTHAN\fP in generic API)
+.IP \(bu 2
+marker: the position of the most recent match, if any (\fBYYMARKER\fP in default
+API or \fBYYBACKUP\fP/\fBYYRESTORE\fP in generic API)
+.IP \(bu 2
+token: the start of the current lexeme (implicit in re2c API, as it is not
+needed for the normal lexer operation and can be defined and updated by the
+user)
+.IP \(bu 2
+context marker: the position of the trailing context (\fBYYCTXMARKER\fP in
+C pointer API or \fBYYBACKUPCTX\fP/\fBYYRESTORECTX\fP in generic API)
+.IP \(bu 2
+tag variables: submatch positions (defined with \fB/*!stags:re2c*/\fP and
+\fB/*!mtags:re2c*/\fP directives and
+\fBYYSTAGP\fP/\fBYYSTAGN\fP/\fBYYMTAGP\fP/\fBYYMTAGN\fP in generic API)
+.UNINDENT
+.sp
+Not all these are used in every case, but if used, they must be updated by
+\fBYYFILL\fP\&. All active positions are contained in the segment between token and
+cursor, therefore everything between buffer start and token can be discarded,
+the segment from token and up to limit should be moved to the beginning of
+buffer, and the free space at the end of buffer should be filled with new data.
+In order to avoid frequent \fBYYFILL\fP calls it is best to fill in as many input
+characters as possible (even though fewer characters might suffice to resume the
+lexer). The details of \fBYYFILL\fP implementation are slightly different
+depending on which EOF handling method is used: the case of EOF rule is somewhat
+simpler than the case of bounds\-checking with padding. Also note that if
+\fB\-f \-\-storable\-state\fP option is used, \fBYYFILL\fP has slightly different
+semantics (described in the section about storable state).
+.SS YYFILL with sentinel
+.sp
+If EOF rule is used, \fBYYFILL\fP is a function\-like primitive that accepts
+no arguments and returns a value which is checked against zero. \fBYYFILL\fP
+invocation is triggered by condition \fBYYLIMIT <= YYCURSOR\fP in C pointer API and
+\fBYYLESSTHAN()\fP in generic API. A non\-zero return value means that \fBYYFILL\fP
+has failed. A successful \fBYYFILL\fP call must supply at least one character and
+adjust input positions accordingly. Limit must always be set to one after the
+last input position in buffer, and the character at the limit position must be
+the sentinel symbol specified by \fBre2c:eof\fP configuration. The pictures below
+show the relative locations of input positions in buffer before and after
+\fBYYFILL\fP call (sentinel symbol is marked with \fB#\fP, and the second picture
+shows the case when there is not enough input to fill the whole buffer).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-\-\-\-\-\-\-\-\-\-E\->
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-\-\-\-\-\-\-\-\-\-E#\->
+             buffer,  marker        cursor        limit
+             token
+
+               <\-\- shift \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D#\-\-E (EOF)
+             buffer       token    marker         limit,
+                                                  cursor
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-\-\-\-\-\-\-\-\-D\-\-\-E#........
+             buffer,  marker       cursor limit
+             token
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses EOF rule.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+const bufsize = 4095;
+
+const State = struct {
+    yyinput: [bufsize + 1]u8,
+    yycursor: usize,
+    yymarker: usize,
+    yylimit: usize,
+    token: usize,
+    eof: bool
+};
+
+fn fill(st: *State, file: anytype) i32 {
+    if (st.eof) { return \-1; } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if (st.token < 1) { return \-2; }
+
+    // Shift buffer contents (discard everything up to the current token).
+    std.mem.copyBackwards(
+        u8, st.yyinput[0..st.yylimit \- st.token], st.yyinput[st.token..st.yylimit]);
+    st.yycursor \-= st.token;
+    st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Fill free space at the end of buffer with new data from file.
+    st.yylimit += file.read(st.yyinput[st.yylimit..bufsize]) catch 0;
+    st.yyinput[st.yylimit] = 0; // append sentinel symbol
+
+    // If read less than expected, this is the end of input.
+    st.eof = st.yylimit < bufsize;
+
+    return 0;
+}
+
+fn lex(yyrecord: *State, file: anytype) i32 {
+    var count: i32 = 0;
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor;
+        %{
+            re2c:api = record;
+            re2c:eof = 0;
+            re2c:define:YYFILL = \(dqfill(yyrecord, file) == 0\(dq;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            *    { return \-1; }
+            $    { return count; }
+            str  { count += 1; continue :loop; }
+            [ ]+ { continue :loop; }
+        %}
+    }
+}
+
+test {
+    const fname = \(dqinput\(dq;
+    const content = \(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq ** bufsize;
+    const count = 3 * bufsize; // number of quoted strings written to file
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    var fw = try std.fs.cwd().createFile(fname, .{});
+    try fw.writeAll(content);
+    fw.close();
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    var fr = try std.fs.cwd().openFile(fname, .{ .mode = .read_only});
+    // Normally file would be part of the state struct, but BufferedReader type is unclear.
+    var br = std.io.bufferedReader(fr.reader());
+    var st = State{
+        .yyinput = undefined,
+        .yycursor = bufsize,
+        .yymarker = bufsize,
+        .yylimit = bufsize,
+        .token = bufsize,
+        .eof = false,
+    };
+    // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+    st.yyinput[st.yylimit] = 0;
+
+    // Run the lexer.
+    try std.testing.expectEqual(lex(&st, &br), count);
+
+    // Cleanup: remove input file.
+    fr.close();
+    try std.fs.cwd().deleteFile(fname);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS YYFILL with padding
+.sp
+In the default case (when EOF rule is not used) \fBYYFILL\fP is a function\-like
+primitive that accepts a single argument and does not return any value.
+\fBYYFILL\fP invocation is triggered by condition \fB(YYLIMIT \- YYCURSOR) < n\fP in
+C pointer API and \fBYYLESSTHAN(n)\fP in generic API. The argument passed to
+\fBYYFILL\fP is the minimal number of characters that must be supplied. If it
+fails to do so, \fBYYFILL\fP must not return to the lexer (for that reason it is
+best implemented as a macro that returns from the calling function on failure).
+In case of a successful \fBYYFILL\fP invocation the limit position must be set
+either to one after the last input position in buffer, or to the end of
+\fBYYMAXFILL\fP padding (in case \fBYYFILL\fP has successfully read at least \fBn\fP
+characters, but not enough to fill the entire buffer). The pictures below show
+the relative locations of input positions in buffer before and after \fBYYFILL\fP
+invocation (\fBYYMAXFILL\fP padding on the second picture is marked with \fB#\fP
+symbols).
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-\-\-F\-\-\-\-\-\-\-\-G\->
+             buffer,  marker cursor               limit
+             token
+
+               <\-\- shift \-\->                 <\-\- need \-\->
+             >\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F        (EOF)
+             buffer       token    marker cursor  limit
+
+>\-A\-\-\-\-\-\-\-\-\-\-\-\-B\-\-\-\-\-\-\-\-\-C\-\-\-\-\-D\-\-\-\-\-\-\-E\-F###############
+             buffer,  marker cursor                   limit
+             token                        <\- YYMAXFILL \->
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of a program that reads input file \fBinput.txt\fP in chunks of
+4096 bytes and uses bounds\-checking with padding.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+%{max %}
+const bufsize = 4096;
+
+const State = struct {
+    yyinput: [bufsize + yymaxfill]u8,
+    yycursor: usize,
+    yymarker: usize,
+    yylimit: usize,
+    token: usize,
+    eof: bool
+};
+
+fn fill(st: *State, need: usize, file: anytype) i32 {
+    if (st.eof) { return \-1; } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if (st.token < need) { return \-2; }
+
+    // Shift buffer contents (discard everything up to the current token).
+    std.mem.copyBackwards(
+        u8, st.yyinput[0..st.yylimit \- st.token], st.yyinput[st.token..st.yylimit]);
+    st.yycursor \-= st.token;
+    st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Fill free space at the end of buffer with new data from file.
+    st.yylimit += file.read(st.yyinput[st.yylimit..bufsize]) catch 0;
+
+    // If read less than expected, this is the end of input.
+    if (st.yylimit < bufsize) {
+        st.eof = true;
+        @memset(st.yyinput[st.yylimit..st.yylimit + yymaxfill], 0);
+        st.yylimit += yymaxfill;
+    }
+
+    return 0;
+}
+
+fn lex(yyrecord: *State, file: anytype) i32 {
+    var count: i32 = 0;
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor;
+        %{
+            re2c:api = record;
+            re2c:define:YYFILL = \(dq{ if (fill(yyrecord, @@, file) != 0) return \-2; }\(dq;
+
+            str = [\(aq] ([^\(aq\e\e] | [\e\e][^])* [\(aq];
+
+            [\ex00] {
+                // Check that it is the sentinel, not some unexpected null.
+                return if (yyrecord.token == yyrecord.yylimit \- yymaxfill) count else \-1;
+            }
+            str  { count += 1; continue :loop; }
+            [ ]+ { continue :loop; }
+            *    { return \-1; }
+        %}
+    }
+}
+
+test {
+    const fname = \(dqinput\(dq;
+    const content = \(dq\(aqqu\ex00tes\(aq \(aqare\(aq \(aqfine: \e\e\(aq\(aq \(dq ** bufsize;
+    const count = 3 * bufsize; // number of quoted strings written to file
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    var fw = try std.fs.cwd().createFile(fname, .{});
+    try fw.writeAll(content);
+    fw.close();
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    // This immediately triggers YYFILL, as the YYLESSTHAN condition is true.
+    var fr = try std.fs.cwd().openFile(fname, .{ .mode = .read_only});
+    // Normally file would be part of the state struct, but BufferedReader type is unclear.
+    var br = std.io.bufferedReader(fr.reader());
+    var st = State{
+        .yyinput = undefined,
+        .yycursor = bufsize,
+        .yymarker = bufsize,
+        .yylimit = bufsize,
+        .token = bufsize,
+        .eof = false,
+    };
+    @memset(st.yyinput[st.yylimit..st.yylimit + yymaxfill], 0); // zero\-padding at the end
+
+    // Run the lexer.
+    try std.testing.expectEqual(lex(&st, &br), count);
+
+    // Cleanup: remove input file.
+    fr.close();
+    try std.fs.cwd().deleteFile(fname);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH MULTIPLE BLOCKS
+.sp
+Sometimes it is necessary to have multiple interrelated lexers (for example, if
+there is a high\-level state machine that transitions between lexer modes). This
+can be implemented using multiple connected re2c blocks. Another option is to
+use \fI\%start conditions\fP\&.
+.sp
+The implementation of connections between blocks depends on the target language.
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) one can have
+all blocks in one function, each of them prefixed with a label. Transition from
+one block to another is a simple \fBgoto\fP\&.
+In languages that do not have \fBgoto\fP (such as Rust) it is necessary to use a
+loop with a switch on a state variable, similar to the \fByystate\fP loop/switch
+generated by re2c, or else wrap each block in a function and use function calls.
+.sp
+The example below uses multiple blocks to parse binary, octal, decimal and
+hexadecimal numbers. Each base has its own block. The initial block determines
+base and dispatches to other blocks. Common configurations are defined in a
+separate block at the beginning of the program; they are inherited by the other
+blocks.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+// Store u32 number in u64 during parsing to simplify overflow hadling.
+const State = struct {
+    yyinput: [:0]const u8,
+    yycursor: usize,
+    yymarker: usize,
+    num: u64,
+};
+
+// Common re2c definitions shared between all functions.
+%{
+    re2c:api = record;
+    re2c:variable:yyrecord = st;
+    re2c:yyfill:enable = 0;
+%}
+
+const ERROR: u64 = @as(u64, std.math.maxInt(u32)) + 1; // overflow
+
+// Add digit with the given base, checking for overflow.
+fn add(st: *State, offs: u8, base: u64) void {
+    const digit = st.yyinput[st.yycursor \- 1] \- offs;
+    st.num = @min(st.num * base + digit, ERROR);
+}
+
+// Convert u64 to optional u32 (null meaning overflow or parse error).
+fn maybeU32(num: u64) ?u32 {
+    return if (num < ERROR) @intCast(num) else null;
+}
+
+fn parse_u32(s: [:0]const u8) ?u32 {
+    var st = State {.yyinput = s, .yycursor = 0, .yymarker = 0, .num = 0};
+    %{
+        \(aq0b\(aq / [01]        { return parse_bin(&st); }
+        \(dq0\(dq                { return parse_oct(&st); }
+        \(dq\(dq / [1\-9]         { return parse_dec(&st); }
+        \(aq0x\(aq / [0\-9a\-fA\-F] { return parse_hex(&st); }
+        *                  { return null; }
+    %}
+}
+
+fn parse_bin(st: *State) ?u32 {
+    bin: while (true) {%{
+        [01] { add(st, 48, 2); continue :bin; }
+        *    { return maybeU32(st.num); }
+    %}}
+}
+
+fn parse_oct(st: *State) ?u32 {
+    oct: while (true) {%{
+        [0\-7] { add(st, 48, 8); continue :oct; }
+        *     { return maybeU32(st.num); }
+    %}}
+}
+
+fn parse_dec(st: *State) ?u32 {
+    dec: while (true) {%{
+        [0\-9] { add(st, 48, 10); continue :dec; }
+        *     { return maybeU32(st.num); }
+    %}}
+}
+
+fn parse_hex(st: *State) ?u32 {
+    hex: while (true) {%{
+        [0\-9] { add(st, 48, 16); continue :hex; }
+        [a\-f] { add(st, 87, 16); continue :hex; }
+        [A\-F] { add(st, 55, 16); continue :hex; }
+        *     { return maybeU32(st.num); }
+    %}}
+}
+
+test {
+    try std.testing.expectEqual(parse_u32(\(dq\(dq), null);
+    try std.testing.expectEqual(parse_u32(\(dq1234567890\(dq), 1234567890);
+    try std.testing.expectEqual(parse_u32(\(dq0b1101\(dq), 13);
+    try std.testing.expectEqual(parse_u32(\(dq0x7Fe\(dq), 2046);
+    try std.testing.expectEqual(parse_u32(\(dq0644\(dq), 420);
+    try std.testing.expectEqual(parse_u32(\(dq9999999999\(dq), null);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH START CONDITIONS
+.sp
+Start conditions are enabled with \fB\-\-start\-conditions\fP option. They provide a
+way to encode multiple interrelated automata within the same re2c block.
+.sp
+Each condition corresponds to a single automaton and has a unique name specified
+by the user and a unique internal number defined by re2c. The numbers are used
+to switch between conditions: the generated code uses \fBYYGETCONDITION\fP and
+\fBYYSETCONDITION\fP primitives to get the current condition or set it to the
+given number. Use \fB/*!conditions:re2c*/\fP directive or the \fB\-\-header\fP option
+to generate numeric condition identifiers. Configuration
+\fBre2c:cond:enumprefix\fP specifies the generated identifier prefix.
+.sp
+In condition mode every rule must be prefixed with a list of comma\-separated
+condition names in angle brackets, or a wildcard \fB<*>\fP to denote all
+conditions. The rule syntax is extended as follows:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \fB< cond\-list > regexp action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp => cond action\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP, sets the current condition to \fBcond\fP and
+executes the associated \fBaction\fP\&.
+.TP
+.B \fB< cond\-list > regexp :=> cond\fP
+A rule that is merged to every condition on the \fBcond\-list\fP\&.
+It matches \fBregexp\fP and immediately transitions to \fBcond\fP (there is
+no semantic action).
+.TP
+.B \fB<! cond\-list > action\fP
+The \fBaction\fP is prepended to semantic actions of all rules for every
+condition on the \fBcond\-list\fP\&. This may be used to deduplicate common
+code.
+.TP
+.B \fB< > action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and executes the \fBaction\fP\&.
+.TP
+.B \fB< > => cond action\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string, sets the current condition to
+\fBcond\fP and executes the \fBaction\fP\&.
+.TP
+.B \fB< > :=> cond\fP
+A rule that is merged to a special entry condition with number zero
+and name \fB\(dq0\(dq\fP\&. It matches empty string and immediately transitions to
+\fBcond\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The code re2c generates for conditions depends on whether re2c uses goto/label
+approach or loop/switch approach to encode the automata.
+.sp
+In languages that have \fBgoto\fP statement (such as C/C++ and Go) conditions are
+naturally implemented as blocks of code prefixed with labels of the form
+\fByyc_<cond>\fP, where \fBcond\fP is a condition name (label prefix can be changed
+with \fBre2c:cond:prefix\fP). Transitions between conditions are implemented using
+\fBgoto\fP and condition labels. Before all conditions re2c generates an initial
+switch on \fBYYGETSTATE\fP that jumps to the start state of the current condition.
+The shortcut rules \fB:=>\fP bypass the initial switch and jump directly to the
+specified condition (\fBre2c:cond:goto\fP can be used to change the default
+behavior). The rules with semantic actions do not automatically jump to the next
+condition; this should be done by the user\-defined action code.
+.sp
+In languages that do not have \fBgoto\fP (such as Rust) re2c reuses the
+\fByystate\fP variable to store condition numbers. Each condition gets a numeric
+identifier equal to the number of its start state, and a switch between
+conditions is no different than a switch between DFA states of a single
+condition. There is no need for a separate initial condition switch.
+(Since the same approach is used to implement storable states,
+\fBYYGETCONDITION\fP/\fBYYSETCONDITION\fP are redundant if both storable states and
+conditions are used).
+.sp
+The program below uses start conditions to parse binary, octal, decimal and
+hexadecimal numbers. There is a single block where each base has its own
+condition, and the initial condition is connected to all of them. User\-defined
+variable \fBcond\fP stores the current condition number; it is initialized to the
+number of the initial condition generated with \fB/*!conditions:re2c*/\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT \-c
+
+const std = @import(\(dqstd\(dq);
+
+%{conditions %}
+
+const ERROR: u64 = @as(u64, std.math.maxInt(u32)) + 1; // overflow
+
+// Add digit with the given base, checking for overflow.
+fn add(num: *u64, str: [:0]const u8, cur: usize, offs: u8, base: u64) void {
+    num.* = @min(num.* * base + (str[cur \- 1] \- offs), ERROR);
+}
+
+fn parse_u32(yyinput: [:0]const u8) ?u32 {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    var yycond = yycinit;
+    var num: u64 = 0; // Store number in u64 to simplify overflow checks.
+
+    loop: while(true) {
+    %{
+        re2c:yyfill:enable = 0;
+
+        <init> \(aq0b\(aq / [01]        :=> bin
+        <init> \(dq0\(dq                :=> oct
+        <init> \(dq\(dq / [1\-9]         :=> dec
+        <init> \(aq0x\(aq / [0\-9a\-fA\-F] :=> hex
+        <init> * { return null; }
+
+        <bin> [01]  { add(&num, yyinput, yycursor, 48, 2);  continue :loop; }
+        <oct> [0\-7] { add(&num, yyinput, yycursor, 48, 8);  continue :loop; }
+        <dec> [0\-9] { add(&num, yyinput, yycursor, 48, 10); continue :loop; }
+        <hex> [0\-9] { add(&num, yyinput, yycursor, 48, 16); continue :loop; }
+        <hex> [a\-f] { add(&num, yyinput, yycursor, 87, 16); continue :loop; }
+        <hex> [A\-F] { add(&num, yyinput, yycursor, 55, 16); continue :loop; }
+
+        <bin, oct, dec, hex> * {
+            return if (num < ERROR) @intCast(num) else null;
+        }
+    %}}
+}
+
+test {
+    try std.testing.expectEqual(parse_u32(\(dq\(dq), null);
+    try std.testing.expectEqual(parse_u32(\(dq1234567890\(dq), 1234567890);
+    try std.testing.expectEqual(parse_u32(\(dq0b1101\(dq), 13);
+    try std.testing.expectEqual(parse_u32(\(dq0x7Fe\(dq), 2046);
+    try std.testing.expectEqual(parse_u32(\(dq0644\(dq), 420);
+    try std.testing.expectEqual(parse_u32(\(dq9999999999\(dq), null);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH STORABLE STATE
+.sp
+With \fB\-\-storable\-state\fP option re2c generates a lexer that can store
+its current state, return to the caller, and later resume operations exactly
+where it left off. The default mode of operation in re2c is a \(dqpull\(dq model,
+in which the lexer \(dqpulls\(dq more input whenever it needs it. This may be
+unacceptable in cases when the input becomes available piece by piece (for
+example, if the lexer is invoked by the parser, or if the lexer program
+communicates via a socket protocol with some other program that must wait for a
+reply from the lexer before it transmits the next message). Storable state
+feature is intended exactly for such cases: it allows one to generate lexers that
+work in a \(dqpush\(dq model. When the lexer needs more input, it stores its state and
+returns to the caller. Later, when more input becomes available, the caller
+resumes the lexer exactly where it stopped. There are a few changes necessary
+compared to the \(dqpull\(dq model:
+.INDENT 0.0
+.IP \(bu 2
+Define \fBYYSETSTATE()\fP and \fBYYGETSTATE(state)\fP primitives.
+.IP \(bu 2
+Define \fByych\fP, \fByyaccept\fP (if used) and \fBstate\fP variables as a part of
+persistent lexer state. The \fBstate\fP variable should be initialized to \fB\-1\fP\&.
+.IP \(bu 2
+\fBYYFILL\fP should return to the outer program instead of trying to supply more
+input. Return code should indicate that lexer needs more input.
+.IP \(bu 2
+The outer program should recognize situations when lexer needs more input and
+respond appropriately.
+.IP \(bu 2
+Optionally use \fBgetstate:re2c\fP to generate \fBYYGETSTATE\fP switch detached
+from the main lexer. This only works for languages that have \fBgoto\fP (not in
+\fB\-\-loop\-switch\fP mode).
+.IP \(bu 2
+Use \fBre2c:eof\fP and the \fI\%sentinel with bounds checks\fP method to handle the
+end of input. Padding\-based method may not work because it is unclear when to
+append padding: the current end of input may not be the ultimate end of input,
+and appending padding too early may cut off a partially read greedy lexeme.
+Furthermore, due to high\-level program logic getting more input may depend on
+processing the lexeme at the end of buffer (which already is blocked due to
+the end\-of\-input condition).
+.UNINDENT
+.sp
+Here is an example of a \(dqpush\(dq model lexer that simulates reading packets from a
+socket. The lexer loops until it encounters the end of input and returns to the
+calling function. The calling function provides more input by \(dqsending\(dq the next
+packet and resumes lexing. This process stops when all the packets have been
+sent, or when there is an error.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT \-f
+
+const std = @import(\(dqstd\(dq);
+
+const Status = enum {
+    end,
+    ready,
+    waiting,
+    bad_packet,
+    big_packet
+};
+
+// Use a small buffer to cover the case when a lexeme doesn\(aqt fit.
+// In real world use a larger buffer.
+const bufsize = 10;
+
+const State = struct {
+    yyinput: [bufsize + 1]u8,
+    yycursor: usize,
+    yymarker: usize,
+    yylimit: usize,
+    token: usize,
+    yystate: i32,
+    received: usize,
+};
+
+fn fill(st: *State, file: anytype) Status {
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if (st.token < 1) { return Status.big_packet; }
+
+    // Shift buffer contents (discard everything up to the current token).
+    std.mem.copyBackwards(
+        u8, st.yyinput[0..st.yylimit \- st.token], st.yyinput[st.token..st.yylimit]);
+    st.yycursor \-= st.token;
+    st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
+    st.yylimit \-= st.token;
+    st.token = 0;
+
+    // Fill free space at the end of buffer with new data from file.
+    st.yylimit += file.read(st.yyinput[st.yylimit..bufsize]) catch 0;
+    st.yyinput[st.yylimit] = 0; // append sentinel symbol
+
+    return Status.ready;
+}
+
+fn lex(yyrecord: *State) Status {
+    var yych: u8 = 0;
+    loop: while (true) {
+        yyrecord.token = yyrecord.yycursor;
+        %{
+            re2c:api = record;
+            re2c:eof = 0;
+            re2c:define:YYFILL = \(dqreturn Status.waiting;\(dq;
+
+            packet = [a\-z]+[;];
+
+            *      { return Status.bad_packet; }
+            $      { return Status.end; }
+            packet { yyrecord.received += 1; continue :loop; }
+        %}
+    }
+}
+
+fn run(expect: Status, packets: []const []const u8) !void {
+    // Create a \(dqpipe\(dq (open the same file for reading and writing).
+    const fname = \(dqinput\(dq;
+    var fw = try std.fs.cwd().createFile(fname, .{});
+    var fr = try std.fs.cwd().openFile(fname, .{ .mode = .read_only});
+
+    // Initialize lexer state: \(gastate\(ga value is \-1, all offsets are at the end
+    // of buffer. Normally file would be part of the state, but BufferedReader
+    // type is unclear.
+    var br = std.io.bufferedReader(fr.reader());
+    var st = State{
+        .yyinput = undefined,
+        .yycursor = bufsize,
+        .yymarker = bufsize,
+        .yylimit = bufsize,
+        .token = bufsize,
+        .yystate = \-1,
+        .received = 0,
+    };
+    // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+    st.yyinput[st.yylimit] = 0;
+
+    // Main loop. The buffer contains incomplete data which appears packet by
+    // packet. When the lexer needs more input it saves its internal state and
+    // returns to the caller which should provide more input and resume lexing.
+    var status = Status.ready;
+    var send: usize = 0;
+    while (true) {
+        status = lex(&st);
+        if (status == Status.end) {
+            break;
+        } else if (status == Status.waiting) {
+            if (send < packets.len) {
+                std.log.debug(\(dqsending packet {}\(dq, .{send});
+                try fw.writeAll(packets[send]);
+                send += 1;
+            }
+            status = fill(&st, &br);
+            std.log.debug(\(dqfilled buffer [{s}], status {}\(dq, .{st.yyinput, status});
+            if (status != Status.ready) {
+                break;
+            }
+        } else if (status == Status.bad_packet) {
+            break;
+        }
+    }
+
+    // Check results.
+    try std.testing.expectEqual(status, expect);
+    if (status == Status.end) { try std.testing.expectEqual(st.received, send); }
+
+    // Cleanup: remove input file.
+    fw.close();
+    fr.close();
+    try std.fs.cwd().deleteFile(fname);
+}
+
+test {
+    try run(Status.end, &[_][]const u8{});
+    try run(Status.end, &[_][]const u8{\(dqzero;\(dq, \(dqone;\(dq, \(dqtwo;\(dq, \(dqthree;\(dq, \(dqfour;\(dq});
+    try run(Status.bad_packet, &[_][]const u8{\(dq??;\(dq});
+    try run(Status.big_packet, &[_][]const u8{\(dqlooooooooooooong;\(dq});
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH REUSABLE BLOCKS
+.sp
+Reusable blocks are re2c blocks that can be reused any number of times and
+combined with other re2c blocks. They are defined with
+\fB/*!rules:re2c[:<name>] ... */\fP (the \fB<name>\fP is optional). A rules block
+can be used in two contexts: either in a use block, or in a use directive inside
+of another block. The code for a rules block is generated at every point of use.
+.sp
+Use blocks are defined with \fB/*!use:re2c[:<name>] ... */\fP\&. The \fB<name>\fP
+is optional; if not specified, the associated rules block is the most recent one
+(whether named or unnamed). A use block can add named definitions,
+configurations and rules of its own.
+An important use case for use blocks is a lexer that supports multiple input
+encodings: the same rules block is reused multiple times with encoding\-specific
+configurations (see the example below).
+.sp
+In\-block use directive \fB!use:<name>;\fP can be used from inside of a re2c
+block. It merges the referenced block \fB<name>\fP into the current one. If some
+of the merged rules and configurations overlap with the previously defined ones,
+conflicts are resolved in the usual way: the earliest rule takes priority, and
+latest configuration overrides preceding ones. One exception are the special
+rules \fB*\fP, \fB$\fP and (in condition mode) \fB<!>\fP, for which a block\-local
+definition overrides any inherited ones. Use directive allows one to combine
+different re2c blocks together in one block (see the example below).
+.sp
+Named blocks and in\-block use directive were added in re2c version 2.2.
+Since that version reusable blocks are allowed by default (no special option
+is needed). Before version 2.2 reuse mode was enabled with \fB\-r \-\-reusable\fP
+option. Before version 1.2 reusable blocks could not be mixed with normal
+blocks.
+.SS Example of a \fB!use\fP directive
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+// This example shows how to combine reusable re2c blocks: two blocks
+// (\(aqcolors\(aq and \(aqfish\(aq) are merged into one. The \(aqsalmon\(aq rule occurs
+// in both blocks; the \(aqfish\(aq block takes priority because it is used
+// earlier. Default rule * occurs in all three blocks; the local (not
+// inherited) definition takes priority.
+
+const std = @import(\(dqstd\(dq);
+
+const Ans = enum {color, fish, dunno};
+
+%{rules:colors
+    *                            { @panic(\(dqah\(dq); }
+    \(dqred\(dq | \(dqsalmon\(dq | \(dqmagenta\(dq { return Ans.color; }
+%}
+
+%{rules:fish
+    *                            { @panic(\(dqoh\(dq); }
+    \(dqhaddock\(dq | \(dqsalmon\(dq | \(dqeel\(dq { return Ans.fish; }
+%}
+
+fn lex(yyinput: [:0]const u8) Ans {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    %{
+        re2c:yyfill:enable = 0;
+
+        !use:fish;
+        !use:colors;
+        * { return Ans.dunno; } // overrides inherited \(aq*\(aq rules
+    %}
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dqsalmon\(dq), Ans.fish);
+    try std.testing.expectEqual(lex(\(dqwhat?\(dq), Ans.dunno);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Example of a \fB/*!use:re2c ... */\fP block
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT \-\-input\-encoding utf8
+
+// This example supports multiple input encodings: UTF\-8 and UTF\-32.
+// Both lexers are generated from the same rules block, and the use
+// blocks add only encoding\-specific configurations.
+
+const std = @import(\(dqstd\(dq);
+
+%{rules
+    re2c:yyfill:enable = 0;
+
+    \(dq∀x ∃y\(dq { return yycursor; }
+    *       { return null; }
+%}
+
+fn lex_utf8(yyinput: []const u8) ?usize {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    %{use
+        re2c:encoding:utf8 = 1;
+        re2c:define:YYCTYPE = u8; // the default
+    %}
+}
+
+fn lex_utf32(yyinput: []const u32) ?usize {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    %{use
+        re2c:encoding:utf32 = 1;
+        re2c:define:YYCTYPE = u32;
+    %}
+}
+
+test {
+    const s8 = [_]u8{0xe2, 0x88, 0x80, 0x78, 0x20, 0xe2, 0x88, 0x83, 0x79};
+    try std.testing.expectEqual(lex_utf8(&s8), s8.len);
+
+    const s32 = [_]u32{0x2200, 0x78, 0x20, 0x2203, 0x79};
+    try std.testing.expectEqual(lex_utf32(&s32), s32.len);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SUBMATCH EXTRACTION
+.sp
+re2c has two options for submatch extraction.
+.INDENT 0.0
+.TP
+.B \fBTags\fP
+The first option is to use standalone \fItags\fP of the form \fB@stag\fP or
+\fB#mtag\fP, where \fBstag\fP and \fBmtag\fP are arbitrary used\-defined names.
+Tags are enabled with \fB\-T \-\-tags\fP option or \fBre2c:tags = 1\fP
+configuration. Semantically tags are position markers: they can be
+inserted anywhere in a regular expression, and they bind to the
+corresponding position (or multiple positions) in the input string.
+\fIS\-tags\fP bind to the last matching position, and \fIm\-tags\fP bind to a list of
+positions (they may be used in repetition subexpressions, where a single
+position in a regular expression corresponds to multiple positions in the
+input string). All tags should be defined by the user, either manually or
+with the help of \fBsvars:re2c\fP and \fBmvars:re2c\fP directives.
+If there is more than one way tags can be matched against the input,
+ambiguity is resolved using leftmost greedy disambiguation strategy.
+.TP
+.B \fBCaptures\fP
+The second option is to use \fIcapturing groups\fP\&. They are enabled with
+\fB\-\-captures\fP option or \fBre2c:captures = 1\fP configuration. There are two
+flavours for different disambiguation policies, \fB\-\-leftmost\-captures\fP
+(the default) is for leftmost greedy policy, and, \fB\-\-posix\-captures\fP is
+for POSIX longest\-match policy. In this mode all parenthesized
+subexpressions are considered capturing groups, and a bang can be used to
+mark non\-capturing groups: \fB(! ... )\fP\&. With \fB\-\-invert\-captures\fP option or
+\fBre2c:invert\-captures = 1\fP configuration the meaning of bang is inverted.
+The number of groups for the matching rule is stored in a variable
+\fByynmatch\fP (the whole regular expression is group number zero), and
+submatch results are stored in \fByypmatch\fP array. Both \fByynmatch\fP and
+\fByypmatch\fP should be defined by the user, and \fByypmatch\fP size must be at
+least \fB[yynmatch * 2]\fP\&. re2c provides a directive \fBmaxnmatch:re2c\fP
+that defines \fBYYMAXNMATCH\fP, a constant that equals to the maximum value of
+\fByynmatch\fP among all rules.
+.TP
+.B \fBCaptvars\fP
+Another way to use capturing groups is the \fB\-\-captvars\fP option or
+\fBre2c:captvars = 1\fP configuration. The only difference with \fB\-\-captures\fP
+is in the way the generated code stores submatch results: instead of
+\fByynmatch\fP and \fByypmatch\fP re2c generates variables \fByytl<k>\fP and
+\fByytr<k>\fP for \fIk\fP\-th capturing group (the user should declare these with
+\fBsvars:re2c\fP directive). Captures with variables support two dismbiguation
+policies: \fB\-\-leftmost\-captvars\fP or \fBre2c:leftmost\-captvars = 1\fP for
+leftmost greedy policy (the default one) and \fB\-\-posix\-captvars\fP or
+\fBre2c:posix\-captvars\fP for POSIX longest\-match policy.
+.UNINDENT
+.sp
+Under the hood all these options translate into tags and
+\fI\%Tagged Deterministic Finite Automata with Lookahead\fP\&.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there\(aqre no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of \fItag variables\fP which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using \fBstags:re2c\fP or \fBmtags:re2c\fP directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by \fBYYFILL\fP\&.
+.sp
+S\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+save input position to an s\-tag: \fBt = YYCURSOR\fP with C pointer API or a
+user\-defined operation \fBYYSTAGP(t)\fP with generic API
+.IP \(bu 2
+save default value to an s\-tag: \fBt = NULL\fP with C pointer API or a
+user\-defined operation \fBYYSTAGN(t)\fP with generic API
+.IP \(bu 2
+copy one s\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+M\-tags support the following operations:
+.INDENT 0.0
+.IP \(bu 2
+append input position to an m\-tag: a user\-defined operation \fBYYMTAGP(t)\fP
+with both default and generic API
+.IP \(bu 2
+append default value to an m\-tag: a user\-defined operation \fBYYMTAGN(t)\fP
+with both default and generic API
+.IP \(bu 2
+copy one m\-tag to another: \fBt1 = t2\fP
+.UNINDENT
+.sp
+S\-tags can be implemented as scalar values (pointers or offsets). M\-tags need a
+more complex representation, as they need to store a sequence of tag values. The
+most naive and inefficient representation of an m\-tag is a list (array, vector)
+of tag values; a more efficient representation is to store all m\-tags in a
+prefix\-tree represented as array of nodes \fB(v, p)\fP, where \fBv\fP is tag value
+and \fBp\fP is a pointer to parent node.
+.sp
+Here is a simple example of using s\-tags to parse semantic versions consisting
+of three numeric components: major, minor, patch (the latter is optional).
+See below for a more complex example that uses \fBYYFILL\fP\&.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+const SemVer = struct {
+    major: u32,
+    minor: u32,
+    patch: u32,
+};
+
+const none = std.math.maxInt(usize);
+
+fn s2n(str: []const u8) u32 { // convert a pre\-parsed string to a number
+    var n: u32 = 0;
+    for (str) |c| { n = n * 10 + (c \- 48); }
+    return n;
+}
+
+fn parse(yyinput: [:0]const u8) ?SemVer {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+
+    // Final tag variables available in semantic action.
+    %{svars format = \(dqvar @@: usize = none;\(dq; %}
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    %{stags format = \(dqvar @@: usize = none;\(dq; %}
+
+    %{
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 \(dq.\(dq @t3 num @t4 (\(dq.\(dq @t5 num)? [\ex00] {
+            return SemVer {
+                .major = s2n(yyinput[t1..t2]),
+                .minor = s2n(yyinput[t3..t4]),
+                .patch = if (t5 == none) 0 else s2n(yyinput[t5..yycursor \- 1]),
+            };
+        }
+        * { return null; }
+    %}
+}
+
+test {
+    try std.testing.expectEqual(parse(\(dq23.34\(dq), SemVer{.major = 23, .minor = 34, .patch = 0});
+    try std.testing.expectEqual(parse(\(dq1.2.99999\(dq), SemVer{.major = 1, .minor = 2, .patch = 99999});
+    try std.testing.expectEqual(parse(\(dq1.a\(dq), null);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is a more complex example of using s\-tags with \fBYYFILL\fP to parse a file
+with newline\-separated semantic versions. Tag variables are part of the lexer
+state, and they are adjusted in \fBYYFILL\fP like other input positions.
+Note that it is necessary for s\-tags because their values are invalidated after
+shifting buffer contents. It may not be necessary in a custom implementation
+where tag variables store offsets relative to the start of the input string
+rather than the buffer, which may be the case with m\-tags.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+const bufsize = 4095;
+const none = std.math.maxInt(usize);
+
+const err = error.SyntaxError;
+
+const SemVer = struct {
+    major: u32,
+    minor: u32,
+    patch: u32,
+};
+
+fn s2n(str: []const u8) u32 { // convert a pre\-parsed string to a number
+    var n: u32 = 0;
+    for (str) |c| { n = n * 10 + (c \- 48); }
+    return n;
+}
+
+const State = struct {
+    yyinput: [bufsize + 1]u8,
+    yycursor: usize,
+    yymarker: usize,
+    yylimit: usize,
+    token: usize,
+    // Intermediate tag variables must be part of the lexer state passed to YYFILL.
+    // They don\(aqt correspond to tags and should be autogenerated by re2c.
+    %{stags format = \(dq@@: usize,\en\(dq; %}
+    eof: bool
+};
+
+fn fill(st: *State, file: anytype) i32 {
+    if (st.eof) { return \-1; } // unexpected EOF
+
+    // Error: lexeme too long. In real life can reallocate a larger buffer.
+    if (st.token < 1) { return \-2; }
+
+    // Shift buffer contents (discard everything up to the current token).
+    std.mem.copyBackwards(
+        u8, st.yyinput[0..st.yylimit \- st.token], st.yyinput[st.token..st.yylimit]);
+    st.yycursor \-= st.token;
+    st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
+    st.yylimit \-= st.token;
+    // Tag variables need to be shifted like other input positions. The check
+    // for NONE is only needed if some tags are nested inside of alternative or
+    // repetition, so that they can have NONE value.
+    %{stags format = \(dqif (st.@@ != none) st.@@ = @subWithOverflow(st.@@, st.token)[0];\en\(dq; %}
+    st.token = 0;
+
+    // Fill free space at the end of buffer with new data from file.
+    st.yylimit += file.read(st.yyinput[st.yylimit..bufsize]) catch 0;
+    st.yyinput[st.yylimit] = 0; // append sentinel symbol
+
+    // If read less than expected, this is the end of input.
+    st.eof = st.yylimit < bufsize;
+
+    return 0;
+}
+
+fn parse(st: *State, file: anytype) !std.ArrayList(SemVer) {
+    var vers = std.ArrayList(SemVer).init(std.testing.allocator);
+
+    // Final tag variables available in semantic action.
+    %{svars format = \(dqvar @@: usize = 0;\en\(dq; %}
+
+    loop: while (true) {
+        st.token = st.yycursor;
+        %{
+            re2c:api = record;
+            re2c:eof = 0;
+            re2c:tags = 1;
+            re2c:variable:yyrecord = st;
+            re2c:define:YYFILL = \(dqfill(st, file) == 0\(dq;
+
+            num = [0\-9]+;
+
+            num @t1 \(dq.\(dq @t2 num @t3 (\(dq.\(dq @t4 num)? [\en] {
+                try vers.append(SemVer {
+                    .major = s2n(st.yyinput[st.token..t1]),
+                    .minor = s2n(st.yyinput[t2..t3]),
+                    .patch = if (t4 == none) 0 else s2n(st.yyinput[t4..st.yycursor \- 1]),
+                });
+                continue :loop;
+            }
+            $ { return vers; }
+            * { return error.SyntaxError; }
+        %}
+    }
+}
+
+test {
+    const fname = \(dqinput\(dq;
+    const content = \(dq1.22.333\en\(dq ** bufsize;
+
+    // Prepare input file: a few times the size of the buffer, containing
+    // strings with zeroes and escaped quotes.
+    var fw = try std.fs.cwd().createFile(fname, .{});
+    try fw.writeAll(content);
+    fw.close();
+
+    // Prepare lexer state: all offsets are at the end of buffer.
+    var fr = try std.fs.cwd().openFile(fname, .{ .mode = .read_only});
+    // Normally file would be part of the state struct, but BufferedReader type is unclear.
+    var br = std.io.bufferedReader(fr.reader());
+    var st = State{
+        .yyinput = undefined,
+        .yycursor = bufsize,
+        .yymarker = bufsize,
+        .yylimit = bufsize,
+        .token = bufsize,
+        %{stags format = \(dq.@@ = none,\en\(dq; %}
+        .eof = false,
+    };
+    // Sentinel at \(gayylimit\(ga offset is set to zero, which triggers YYFILL.
+    st.yyinput[st.yylimit] = 0;
+
+    // Manually construct expected result.
+    var expect = std.ArrayList(SemVer).init(std.testing.allocator);
+    for (0..bufsize) |_| try expect.append(SemVer{.major = 1, .minor = 22, .patch = 333});
+
+    // Run the lexer.
+    var result = try parse(&st, &br);
+    try std.testing.expectEqualDeep(result, expect);
+
+    // Cleanup: free memory and remove input file.
+    expect.deinit();
+    result.deinit();
+    fr.close();
+    try std.fs.cwd().deleteFile(fname);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using capturing groups to parse semantic versions.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+const none = std.math.maxInt(usize);
+
+const SemVer = struct {
+    major: u32,
+    minor: u32,
+    patch: u32,
+};
+
+fn s2n(str: []const u8) u32 { // convert pre\-parsed string to a number
+    var n: u32 = 0;
+    for (str) |c| { n = n * 10 + (c \- 48); }
+    return n;
+}
+
+fn parse(yyinput: [:0]const u8) ?SemVer {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+
+    // Final tag variables available in semantic action.
+    %{svars format = \(dqvar @@: usize = none;\(dq; %}
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    %{stags format = \(dqvar @@: usize = none;\(dq; %}
+
+    %{
+        re2c:yyfill:enable = 0;
+        re2c:captvars = 1;
+
+        num = [0\-9]+;
+
+        (num) \(dq.\(dq (num) (\(dq.\(dq num)? [\ex00] {
+            return SemVer {
+                .major = s2n(yyinput[yytl1..yytr1]),
+                .minor = s2n(yyinput[yytl2..yytr2]),
+                .patch = if (yytl3 == none) 0 else s2n(yyinput[yytl3 + 1..yytr3])
+            };
+        }
+        * { return null; }
+    %}
+}
+
+test {
+    try std.testing.expectEqual(parse(\(dq23.34\(dq), SemVer{.major = 23, .minor = 34, .patch = 0});
+    try std.testing.expectEqual(parse(\(dq1.2.99999\(dq), SemVer{.major = 1, .minor = 2, .patch = 99999});
+    try std.testing.expectEqual(parse(\(dq1.a\(dq), null);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Here is an example of using m\-tags to parse a version with a variable number of
+components. Tag variables are stored in a trie.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+const none = std.math.maxInt(usize);
+const mtag_root = none \- 1;
+
+const err = error.SyntaxError;
+
+// An m\-tag tree is a way to store histories with an O(1) copy operation.
+// Histories naturally form a tree, as they have common start and fork at some
+// point. The tree is stored as an array of pairs (tag value, link to parent).
+// An m\-tag is represented with a single link in the tree (array index).
+const MtagElem = struct {
+    elem: usize, // tag value
+    pred: usize, // index of the predecessor node or root
+};
+
+// Append a single value to an m\-tag history.
+fn add_mtag(trie: *std.ArrayList(MtagElem), mtag: usize, value: usize) !usize {
+    try trie.append(MtagElem{.elem = value, .pred = mtag});
+    return trie.items.len \- 1;
+}
+
+// Recursively unwind tag histories and collect version components.
+fn unwind(trie: *std.ArrayList(MtagElem),
+          x: usize,
+          y: usize,
+          str: []const u8,
+          ver: *std.ArrayList(u32)) !void {
+    // Reached the root of the m\-tag tree, stop recursion.
+    if (x == mtag_root and y == mtag_root) return;
+
+    // Unwind history further.
+    try unwind(trie, trie.items[x].pred, trie.items[y].pred, str, ver);
+
+    // Get tag values. Tag histories must have equal length.
+    std.debug.assert(x != mtag_root and y != mtag_root);
+    var ex = trie.items[x].elem;
+    var ey = trie.items[y].elem;
+
+    if (ex != none and ey != none) {
+        // Both tags are valid string indices, extract component.
+        try ver.append(s2n(str[ex..ey]));
+    } else {
+        // Both tags are none (this corresponds to zero repetitions).
+        std.debug.assert(ex == none and ey == none);
+    }
+}
+
+fn s2n(str: []const u8) u32 { // convert a pre\-parsed string to a number
+    var n: u32 = 0;
+    for (str) |c| { n = n * 10 + (c \- 48); }
+    return n;
+}
+
+fn parse(yyinput: [:0]const u8) !std.ArrayList(u32) {
+    var yycursor: usize = 0;
+    var yymarker: usize = 0;
+    var mt = std.ArrayList(MtagElem).init(std.testing.allocator);
+    defer mt.deinit();
+
+    // Final tag variables available in semantic action.
+    %{svars format = \(dqvar @@: usize = none;\(dq; %}
+    %{mvars format = \(dqvar @@: usize = mtag_root;\(dq; %}
+
+    // Intermediate tag variables used by the lexer (must be autogenerated).
+    %{stags format = \(dqvar @@: usize = none;\(dq; %}
+    %{mtags format = \(dqvar @@: usize = mtag_root;\(dq; %}
+
+    %{
+        re2c:define:YYMTAGP = \(dq@@ = add_mtag(&mt, @@, yycursor) catch none;\(dq;
+        re2c:define:YYMTAGN = \(dq@@ = add_mtag(&mt, @@, none) catch none;\(dq;
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        num = [0\-9]+;
+
+        @t1 num @t2 (\(dq.\(dq #t3 num #t4)* [\ex00] {
+            var ver = std.ArrayList(u32).init(std.testing.allocator);
+            try ver.append(s2n(yyinput[t1..t2]));
+            try unwind(&mt, t3, t4, yyinput, &ver);
+            return ver;
+        }
+        * { return error.SyntaxError; }
+    %}
+}
+
+test {
+    var result = try parse(\(dq1\(dq);
+    var expect = std.ArrayList(u32).init(std.testing.allocator);
+    try expect.appendSlice(&[_]u32{1});
+    try std.testing.expectEqualDeep(result, expect);
+    expect.deinit();
+    result.deinit();
+}
+
+test {
+    var result = try parse(\(dq1.2.3.4.5.6.7\(dq);
+    var expect = std.ArrayList(u32).init(std.testing.allocator);
+    try expect.appendSlice(&[_]u32{1, 2, 3, 4, 5, 6, 7});
+    try std.testing.expectEqualDeep(result, expect);
+    expect.deinit();
+    result.deinit();
+}
+
+test {
+    var result = parse(\(dq1.2.\(dq) catch null;
+    try std.testing.expectEqualDeep(result, null);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH ENCODING SUPPORT
+.sp
+It is necessary to understand the difference between \fBcode points\fP and
+\fBcode units\fP\&. A code point is a numeric identifier of a symbol. A code unit is
+the smallest unit of storage in the encoded text. A single code point may be
+represented with one or more code units. In a fixed\-length encoding all code
+points are represented with the same number of code units. In a variable\-length
+encoding code points may be represented with a different number of code units.
+Note that the \(dqany\(dq rule \fB[^]\fP matches any code point, but not necessarily
+any code unit (the only way to match any code unit regardless of the encoding
+is the default rule \fB*\fP).
+The generated lexer works with a stream of code units: \fByych\fP stores a code
+unit, and \fBYYCTYPE\fP is the code unit type. Regular expressions, on the other
+hand, are specified in terms of code points. When re2c compiles regular
+expressions to automata it translates code points to code units. This is
+generally not a simple mapping: in variable\-length encodings a single code point
+range may get translated to a complex code unit graph.
+The following encodings are supported:
+.INDENT 0.0
+.IP \(bu 2
+\fBASCII\fP (enabled by default). It is a fixed\-length encoding with code space
+\fB[0\-255]\fP and 1\-byte code points and code units.
+.IP \(bu 2
+\fBEBCDIC\fP (enabled with \fB\-\-ebcdic\fP or \fBre2c:encoding:ebcdic\fP). It is a
+fixed\-length encoding with code space \fB[0\-255]\fP and 1\-byte code points and
+code units.
+.IP \(bu 2
+\fBUCS2\fP (enabled with \fB\-\-ucs2\fP or \fBre2c:encoding:ucs2\fP). It is a
+fixed\-length encoding with code space \fB[0\-0xFFFF]\fP and 2\-byte code points
+and code units.
+.IP \(bu 2
+\fBUTF8\fP (enabled with \fB\-\-utf8\fP or \fBre2c:encoding:utf8\fP). It is a
+variable\-length Unicode encoding. Code unit size is 1 byte. Code points are
+represented with 1 \-\- 4 code units.
+.IP \(bu 2
+\fBUTF16\fP (enabled with \fB\-\-utf16\fP or \fBre2c:encoding:utf16\fP). It is a
+variable\-length Unicode encoding. Code unit size is 2 bytes. Code points are
+represented with 1 \-\- 2 code units.
+.IP \(bu 2
+\fBUTF32\fP (enabled with \fB\-\-utf32\fP or \fBre2c:encoding:utf32\fP). It is a
+fixed\-length Unicode encoding with code space \fB[0\-0x10FFFF]\fP and 4\-byte code
+points and code units.
+.UNINDENT
+.sp
+Include file \fBinclude/unicode_categories.re\fP provides re2c definitions for the
+standard Unicode categories.
+.sp
+Option \fB\-\-input\-encoding\fP specifies source file encoding, which can be used to
+enable Unicode literals in regular expressions. For example
+\fB\-\-input\-encoding utf8\fP tells re2c that the source file is in UTF8 (it differs
+from \fB\-\-utf8\fP which sets input text encoding). Option \fB\-\-encoding\-policy\fP
+specifies the way re2c handles Unicode surrogates (code points in range
+\fB[0xD800\-0xDFFF]\fP).
+.sp
+Below is an example of a lexer for UTF8 encoded Unicode identifiers.
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT \-\-utf8
+
+const std = @import(\(dqstd\(dq);
+
+%{include \(dqunicode_categories.re\(dq %}
+
+fn lex(yyinput: [:0]const u8) bool {
+    var yycursor: u32 = 0;
+    var yymarker: u32 = 0;
+
+    %{
+        re2c:yyfill:enable = 0;
+
+        // Simplified \(dqUnicode Identifier and Pattern Syntax\(dq
+        // (see https://unicode.org/reports/tr31)
+        id_start    = L | Nl | [$_];
+        id_continue = id_start | Mn | Mc | Nd | Pc | [\eu200D\eu05F3];
+        identifier  = id_start id_continue*;
+
+        identifier { return true; }
+        *          { return false; }
+    %}
+}
+
+test {
+    try std.testing.expect(lex(\(dq_Ыдентификатор\(dq));
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH INCLUDE FILES
+.sp
+re2c allows one to include other files using directive \fB/*!include:re2c FILE */\fP
+or \fB!include FILE ;\fP, where \fBFILE\fP is a path to the file to be included.
+The first form should be used outside of re2c blocks, and the second form allows
+one to include a file in the middle of a re2c block. re2c looks for included
+files in the directory of the including file and in include locations, which
+can be specified with \fB\-I\fP option.
+Include directives in re2c work in the same way as C/C++ \fB#include\fP: the contents
+of \fBFILE\fP are copy\-pasted verbatim in place of the directive. Include files
+may have further includes of their own. Use \fB\-\-depfile\fP option to track build
+dependencies of the output file on include files.
+re2c provides some predefined include files that can be found in the
+\fBinclude/\fP subdirectory of the project. These files contain definitions that
+can be useful to other projects (such as Unicode categories) and form something
+like a standard library for re2c.
+Below is an example of using include directive.
+.SS Include file 1 (definitions.zig)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+const Num = enum { integer, floating, nan };
+
+%{
+    number = [1\-9][0\-9]*;
+%}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Include file 2 (extra_rules.re.inc)
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// floating\-point numbers
+frac  = [0\-9]* \(dq.\(dq [0\-9]+ | [0\-9]+ \(dq.\(dq;
+exp   = \(aqe\(aq [+\-]? [0\-9]+;
+float = frac exp? | [0\-9]+ exp;
+
+float { return Num.floating; }
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT
+
+const std = @import(\(dqstd\(dq);
+
+%{include \(dqdefinitions.zig\(dq %}
+
+fn lex(yyinput: [:0]const u8) Num {
+    var yycursor: u32 = 0;
+    var yymarker: u32 = 0;
+    %{
+        re2c:yyfill:enable = 0;
+
+        *      { return Num.nan; }
+        number { return Num.integer; }
+        !include \(dqextra_rules.re.inc\(dq;
+    %}
+}
+
+test {
+    try std.testing.expectEqual(lex(\(dq123\(dq), Num.integer);
+    try std.testing.expectEqual(lex(\(dq123.4567\(dq), Num.floating);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH HEADER FILES
+.sp
+re2c allows one to generate header file from the input \fB\&.re\fP file using option
+\fB\-t\fP, \fB\-\-type\-header\fP or configuration \fBre2c:flags:type\-header\fP and
+directives \fB/*!header:re2c:on*/\fP and \fB/*!header:re2c:off*/\fP\&. The first directive
+marks the beginning of header file, and the second directive marks the end of
+it. Everything between these directives is processed by re2c, and the generated
+code is written to the file specified by the \fB\-t \-\-type\-header\fP option (or
+\fBstdout\fP if this option was not used). Autogenerated header file may be needed
+in cases when re2c is used to generate definitions of constants, variables and
+structs that must be visible from other translation units.
+.sp
+Here is an example of generating a header file that contains definition of the
+lexer state with tag variables (the number variables depends on the regular
+grammar and is unknown to the programmer).
+.SS Input file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// re2zig $INPUT \-o $OUTPUT \-\-header lexer/state.zig
+
+const std = @import(\(dqstd\(dq);
+const state = @import(\(dqlexer/state.zig\(dq); // the module is generated by re2c
+
+%{header:on %}
+pub const State = struct {
+    yyinput: [:0]const u8,
+    yycursor: usize,
+    %{stags format = \(dq@@: usize,\(dq; %}
+};
+%{header:off %}
+
+fn lex(yyrecord: *state.State) usize {
+    var t: usize = 0;
+    %{
+        re2c:header = \(dqlexer/state.zig\(dq;
+        re2c:api = record;
+        re2c:yyfill:enable = 0;
+        re2c:tags = 1;
+
+        [a]* @t [b]* { return t; }
+    %}
+}
+
+test {
+    var st = state.State {
+        .yyinput = \(dqab\(dq,
+        .yycursor = 0,
+        %{stags format = \(dq.@@ = 0,\(dq; %}
+    };
+    try std.testing.expectEqual(lex(&st), 1);
+}
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SS Header file
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+// Generated by re2c
+
+pub const State = struct {
+    yyinput: [:0]const u8,
+    yycursor: usize,
+    yyt1: usize,
+};
+
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH SKELETON PROGRAMS
+.sp
+With the \fB\-S, \-\-skeleton\fP option, re2c ignores all non\-re2c code and generates
+a self\-contained C program that can be further compiled and executed. The
+program consists of lexer code and input data. For each constructed DFA (block
+or condition) re2c generates a standalone lexer and two files: an \fB\&.input\fP
+file with strings derived from the DFA and a \fB\&.keys\fP file with expected match
+results. The program runs each lexer on the corresponding \fB\&.input\fP file and
+compares results with the expectations.
+Skeleton programs are very useful for a number of reasons:
+.INDENT 0.0
+.IP \(bu 2
+They can check correctness of various re2c optimizations (the data is
+generated early in the process, before any DFA transformations have taken
+place).
+.IP \(bu 2
+Generating a set of input data with good coverage may be useful for both
+testing and benchmarking.
+.IP \(bu 2
+Generating self\-contained executable programs allows one to get minimized test
+cases (the original code may be large or have a lot of dependencies).
+.UNINDENT
+.sp
+The difficulty with generating input data is that for all but the most trivial
+cases the number of possible input strings is too large (even if the string
+length is limited). re2c solves this difficulty by generating sufficiently
+many strings to cover almost all DFA transitions. It uses the following
+algorithm. First, it constructs a skeleton of the DFA. For encodings with 1\-byte
+code unit size (such as ASCII, UTF\-8 and EBCDIC) skeleton is just an exact copy
+of the original DFA. For encodings with multibyte code units skeleton is a copy
+of DFA with certain transitions omitted: namely, re2c takes at most 256 code
+units for each disjoint continuous range that corresponds to a DFA transition.
+The chosen values are evenly distributed and include range bounds. Instead of
+trying to cover all possible paths in the skeleton (which is infeasible) re2c
+generates sufficiently many paths to cover all skeleton transitions, and thus
+trigger the corresponding conditional jumps in the lexer.
+The algorithm implementation is limited by ~1Gb of transitions and consumes
+constant amount of memory (re2c writes data to file as soon as it is generated).
+.SH VISUALIZATION AND DEBUG
+.sp
+With the \fB\-D, \-\-emit\-dot\fP option, re2c does not generate code. Instead,
+it dumps the generated DFA in DOT format.
+One can convert this dump to an image of the DFA using Graphviz or another library.
+Note that this option shows the final DFA after it has gone through a number of
+optimizations and transformations. Earlier stages can be dumped with various debug
+options, such as \fB\-\-dump\-nfa\fP, \fB\-\-dump\-dfa\-raw\fP etc. (see the full list of options).
+.SH SEE ALSO
+.sp
+You can find more information about re2c at the official website: \fI\%http://re2c.org\fP\&.
+Similar programs are flex(1), lex(1), quex(\fI\%http://quex.sourceforge.net\fP).
+.SH AUTHORS
+.sp
+re2c was originally written by Peter Bumbulis (\fI\%peter@csg.uwaterloo.ca\fP) in 1993.
+Marcus Boerger and Dan Nuffer spent several years to turn the original idea into
+a production ready code generator. Since then it has been maintained and
+developed by multiple volunteers, most notably,
+Brian Young (\fI\%bayoung@acm.org\fP),
+\fI\%Marcus Boerger\fP,
+Dan Nuffer (\fI\%nuffer@users.sourceforge.net\fP),
+\fI\%Ulya Trofimovich\fP (\fI\%skvadrik@gmail.com\fP),
+\fI\%Serghei Iakovlev\fP,
+\fI\%Sergei Trofimovich\fP,
+\fI\%Petr Skocik\fP,
+\fI\%ligfx\fP
+and \fI\%raekye\fP\&.
+.\" Generated by docutils manpage writer.
+.
diff --git a/build/split_man.py b/build/split_man.py
index 45d4a160b..c0e94cbbf 100644
--- a/build/split_man.py
+++ b/build/split_man.py
@@ -5,15 +5,18 @@
 input manpage.
 """
 
+import re
 import sys
 
-if len(sys.argv) != 4:
-    print('usage:', sys.argv[0], '<input> <output> <lang>')
+if len(sys.argv) != 3:
+    print('usage:', sys.argv[0], '<input> <output>')
     exit(1)
 
 input = sys.argv[1]
 output = sys.argv[2]
-lang = sys.argv[3].lower().encode('utf-8')
+
+# Extract language name from the output filename.
+lang = re.search('re2([a-z]*)\.1', output).group(1).encode('utf-8')
 
 hdr_ext = None
 disclaimer = None
@@ -28,7 +31,8 @@
 elif lang == b'go':
     src_ext = b'go'
     lang_name = b'Go'
-elif lang == b'haskell':
+elif lang == b'hs':
+    lang = b'haskell'
     src_ext = b'hs'
     lang_name = b'Haskell'
 elif lang == b'java':
@@ -40,7 +44,8 @@
 elif lang == b'ocaml':
     src_ext = b'ml'
     lang_name = b'OCaml'
-elif lang == b'python':
+elif lang == b'py':
+    lang = b'python'
     src_ext = b'py'
     lang_name = b'Python'
 elif lang == b'rust':
diff --git a/cmake/Re2cGenDocs.cmake b/cmake/Re2cGenDocs.cmake
index e046b55b7..b6e7cb3cd 100644
--- a/cmake/Re2cGenDocs.cmake
+++ b/cmake/Re2cGenDocs.cmake
@@ -1,19 +1,21 @@
-function(re2c_gen_manpage source target bootstrap lang)
+function(re2c_gen_manpage source target)
     if(RE2C_REBUILD_DOCS)
         get_filename_component(targetdir "${target}" DIRECTORY)
-        set(source_l "${source}.${lang}")
+        set(split_source "${target}.rst")
+        file(RELATIVE_PATH relative_target "${CMAKE_CURRENT_BINARY_DIR}" "${target}")
+        set(bootstrap "${CMAKE_CURRENT_SOURCE_DIR}/bootstrap/${relative_target}")
         add_custom_command(
             OUTPUT "${target}"
             COMMAND "${CMAKE_COMMAND}" -E make_directory ${targetdir}
-            COMMAND "${PYTHON}" "${re2c_splitman}" "${source}" "${source_l}" "${lang}"
-            COMMAND "${PYTHON}" "${re2c_rst2man}" --tab-width=4 "${source_l}" "${target}"
+            COMMAND "${PYTHON}" "${re2c_splitman}" "${source}" "${split_source}"
+            COMMAND "${PYTHON}" "${re2c_rst2man}" --tab-width=4 "${split_source}" "${target}"
             COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${target}" "${bootstrap}"
             DEPENDS
                 "${source}"
                 "${re2c_splitman}"
                 "${re2c_rst2man}"
                 "${re2c_docs_sources}"
-            BYPRODUCTS "${source_l}"
+            BYPRODUCTS "${split_source}"
         )
     else()
         add_custom_command(
diff --git a/doc/manpage.rst.in b/doc/manpage.rst.in
index 329edd220..cf2c577ee 100644
--- a/doc/manpage.rst.in
+++ b/doc/manpage.rst.in
@@ -159,7 +159,7 @@ Submatch extraction
     :literal:
     :code: RE2C_LANG
 .. include:: @top_srcdir@/doc/manual/submatch/submatch_example_captures.rst_
-.. include:: @top_srcdir@/examples/RE2C_LANG/submatch/04_posix_captures.re
+.. include:: @top_srcdir@/examples/RE2C_LANG/submatch/03_captures.re
     :literal:
     :code: RE2C_LANG
 .. include:: @top_srcdir@/doc/manual/submatch/submatch_example_mtags.rst_
diff --git a/doc/manual/api/api2_c.rst_ b/doc/manual/api/api2_c.rst_
index 6e772dd16..4dc14eb31 100644
--- a/doc/manual/api/api2_c.rst_
+++ b/doc/manual/api/api2_c.rst_
@@ -12,8 +12,8 @@ program:
 **Record API**
     (*added in version 4.0*)
     Record API is useful in cases when lexer state must be stored in a struct.
-    It is enabled with option ``--api record`` or configuration
-    ``re2c:api = record``. This API consists of a variable ``yyrecord`` (the
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
     name can be overridden with ``re2c:variable:yyrecord``) that should be
     defined as a struct with fields ``yycursor``, ``yymarker``, ``yyctxmarker``,
     ``yylimit`` (only the fields used by the generated code need to be defined,
@@ -23,8 +23,8 @@ program:
 
 **Generic API**
     (*added in version 0.14*)
-    This is the most flexible API provided by re2c. It is enabled with
-    ``--api generic`` option or ``re2c:api = generic`` configuration.
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
     This API contains primitives for generic operations:
     ``YYPEEK``,
     ``YYSKIP``,
diff --git a/doc/manual/api/api2_d.rst_ b/doc/manual/api/api2_d.rst_
new file mode 100644
index 000000000..c32622908
--- /dev/null
+++ b/doc/manual/api/api2_d.rst_
@@ -0,0 +1,41 @@
+
+re2d has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for D backend. It consists of primitives
+    ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER`` and ``YYLIMIT``, which
+    should be defined as pointers of type ``YYCTYPE*``.
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in a struct.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as a struct with fields ``yycursor``, ``yymarker``, ``yyctxmarker``,
+    ``yylimit`` (only the fields used by the generated code need to be defined,
+    and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_go.rst_ b/doc/manual/api/api2_go.rst_
index 108d3c780..60a820130 100644
--- a/doc/manual/api/api2_go.rst_
+++ b/doc/manual/api/api2_go.rst_
@@ -4,8 +4,8 @@ program:
 
 **Simple API**
     (*added in version 4.0*)
-    This is a basic API that can be enabled with option ``--api simple`` or
-    configuration ``re2c:api = simple``. It consists of the following
+    This is a basic API that can be enabled with ``--api simple`` option or
+    ``re2c:api = simple`` configuration. It consists of the following
     primitives: ``YYINPUT`` (which should be defined as a sequence of code
     units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
     ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
@@ -15,8 +15,8 @@ program:
 **Record API**
     (*added in version 4.0*)
     Record API is useful in cases when lexer state must be stored in a struct.
-    It is enabled with option ``--api record`` or configuration
-    ``re2c:api = record``. This API consists of a variable ``yyrecord`` (the
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
     name can be overridden with ``re2c:variable:yyrecord``) that should be
     defined as a struct with fields ``yyinput``, ``yycursor``, ``yymarker``,
     ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
@@ -25,10 +25,8 @@ program:
     |
 
 **Generic API**
-    (*added in version 0.14*)
-    This is the default API for the Go backend. It is enabled with
-    ``--api generic`` option or ``re2c:api = generic`` configuration.
-    This API contains primitives for generic operations:
+    This is the most flexible API and the default API for the Go backend.
+    It contains primitives for generic operations:
     ``YYPEEK``,
     ``YYSKIP``,
     ``YYBACKUP``,
diff --git a/doc/manual/api/api2_haskell.rst_ b/doc/manual/api/api2_haskell.rst_
new file mode 100644
index 000000000..35d3a1581
--- /dev/null
+++ b/doc/manual/api/api2_haskell.rst_
@@ -0,0 +1,35 @@
+
+re2hs has two API flavors that define the core set of primitives used by a
+program:
+
+**Record API**
+    Record API is the default API for the Haskell backend.
+    This API consists of a binding ``yyrecord`` (the name can be overridden with
+    ``re2c:variable:yyrecord``) that should be defined as a record with fields
+    ``_yyinput``, ``_yycursor``, ``_yymarker``, ``_yyctxmarker``, ``_yylimit``.
+    Only the fields used by the generated code need to be defined, and their
+    names can be configured.
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYCOPYSTAG``,
+    ``YYCOPYMTAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_java.rst_ b/doc/manual/api/api2_java.rst_
new file mode 100644
index 000000000..77da3d606
--- /dev/null
+++ b/doc/manual/api/api2_java.rst_
@@ -0,0 +1,42 @@
+
+re2java has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for the Java backend. It consists of the following
+    primitives: ``YYINPUT`` (which should be defined as a sequence of code
+    units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
+    ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in a class.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as a class with fields ``yyinput``, ``yycursor``, ``yymarker``,
+    ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
+    need to be defined, and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_js.rst_ b/doc/manual/api/api2_js.rst_
new file mode 100644
index 000000000..48bf0cbfe
--- /dev/null
+++ b/doc/manual/api/api2_js.rst_
@@ -0,0 +1,42 @@
+
+re2js has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for the JavaScript backend. It consists of the
+    following primitives: ``YYINPUT`` (which should be defined as a sequence of
+    code units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
+    ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in an object.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as an object with properties ``yyinput``, ``yycursor``,
+    ``yymarker``, ``yyctxmarker``, ``yylimit`` (only the fields used by the
+    generated code need to be defined, and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_ocaml.rst_ b/doc/manual/api/api2_ocaml.rst_
new file mode 100644
index 000000000..07dbd7f92
--- /dev/null
+++ b/doc/manual/api/api2_ocaml.rst_
@@ -0,0 +1,33 @@
+
+re2ocaml has two API flavors that define the core set of primitives used by a
+program:
+
+**Record API**
+    Record API is the default API for the OCaml backend.
+    This API consists of a variable ``yyrecord`` (the name can be overridden with
+    ``re2c:variable:yyrecord``) that should be defined as a record with fields
+    ``_yyinput``, ``_yycursor``, ``_yymarker``, ``_yyctxmarker``, ``_yylimit``.
+    Only the fields used by the generated code need to be defined, and their
+    names can be configured.
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_python.rst_ b/doc/manual/api/api2_python.rst_
new file mode 100644
index 000000000..cd848f01c
--- /dev/null
+++ b/doc/manual/api/api2_python.rst_
@@ -0,0 +1,42 @@
+
+re2py has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for the Python backend. It consists of the
+    following primitives: ``YYINPUT`` (which should be defined as a sequence of
+    code units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
+    ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in a class.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as a class with attributes ``yyinput``, ``yycursor``, ``yymarker``,
+    ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
+    need to be defined, and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_rust.rst_ b/doc/manual/api/api2_rust.rst_
index a8a44efcf..6ad697aa6 100644
--- a/doc/manual/api/api2_rust.rst_
+++ b/doc/manual/api/api2_rust.rst_
@@ -4,8 +4,8 @@ program:
 
 **Simple API**
     (*added in version 4.0*)
-    This is a basic API that can be enabled with option ``--api simple`` or
-    configuration ``re2c:api = simple``. It consists of the following
+    This is a basic API that can be enabled with ``--api simple`` option or
+    ``re2c:api = simple`` configuration. It consists of the following
     primitives: ``YYINPUT`` (which should be defined as a sequence of code
     units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
     ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
@@ -15,8 +15,8 @@ program:
 **Record API**
     (*added in version 4.0*)
     Record API is useful in cases when lexer state must be stored in a struct.
-    It is enabled with option ``--api record`` or configuration
-    ``re2c:api = record``. This API consists of a variable ``yyrecord`` (the
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
     name can be overridden with ``re2c:variable:yyrecord``) that should be
     defined as a struct with fields ``yyinput``, ``yycursor``, ``yymarker``,
     ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
@@ -25,9 +25,7 @@ program:
     |
 
 **Generic API**
-    (*added in version 0.14*)
-    This is the default API for the Rust backend. It is enabled with
-    ``--api generic`` option or ``re2c:api = generic`` configuration.
+    This is the most flexible API and the default API for the Rust backend.
     This API contains primitives for generic operations:
     ``YYPEEK``,
     ``YYSKIP``,
diff --git a/doc/manual/api/api2_v.rst_ b/doc/manual/api/api2_v.rst_
new file mode 100644
index 000000000..81e167e09
--- /dev/null
+++ b/doc/manual/api/api2_v.rst_
@@ -0,0 +1,42 @@
+
+re2v has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for the V backend. It consists of the following
+    primitives: ``YYINPUT`` (which should be defined as a sequence of code
+    units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
+    ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in a struct.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as a struct with fields ``yyinput``, ``yycursor``, ``yymarker``,
+    ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
+    need to be defined, and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/api/api2_zig.rst_ b/doc/manual/api/api2_zig.rst_
new file mode 100644
index 000000000..83f3d2b87
--- /dev/null
+++ b/doc/manual/api/api2_zig.rst_
@@ -0,0 +1,42 @@
+
+re2zig has three API flavors that define the core set of primitives used by a
+program:
+
+**Simple API**
+    This is the default API for the Zig backend. It consists of the following
+    primitives: ``YYINPUT`` (which should be defined as a sequence of code
+    units, e.g. a string) and ``YYCURSOR``, ``YYMARKER``, ``YYCTXMARKER``,
+    ``YYLIMIT`` (which should be defined as indices in ``YYINPUT``).
+
+    |
+
+**Record API**
+    Record API is useful in cases when lexer state must be stored in a struct.
+    It is enabled with ``--api record`` option or ``re2c:api = record``
+    configuration. This API consists of a variable ``yyrecord`` (the
+    name can be overridden with ``re2c:variable:yyrecord``) that should be
+    defined as a struct with fields ``yyinput``, ``yycursor``, ``yymarker``,
+    ``yyctxmarker``, ``yylimit`` (only the fields used by the generated code
+    need to be defined, and their names can be configured).
+
+    |
+
+**Generic API**
+    This is the most flexible API. It is enabled with ``--api generic`` option
+    or ``re2c:api = generic`` configuration.
+    It contains primitives for generic operations:
+    ``YYPEEK``,
+    ``YYSKIP``,
+    ``YYBACKUP``,
+    ``YYBACKUPCTX``,
+    ``YYSTAGP``,
+    ``YYSTAGN``,
+    ``YYMTAGP``,
+    ``YYMTAGN``,
+    ``YYRESTORE``,
+    ``YYRESTORECTX``,
+    ``YYRESTORETAG``,
+    ``YYSHIFT``,
+    ``YYSHIFTSTAG``,
+    ``YYSHIFTMTAG``,
+    ``YYLESSTHAN``.
diff --git a/doc/manual/submatch/submatch.rst_ b/doc/manual/submatch/submatch.rst_
index 14f181ee1..a886ac655 100644
--- a/doc/manual/submatch/submatch.rst_
+++ b/doc/manual/submatch/submatch.rst_
@@ -1,51 +1,60 @@
 re2c has two options for submatch extraction.
 
-The first option is ``-T --tags``. With this option one can use standalone tags
-of the form ``@stag`` and ``#mtag``, where ``stag`` and ``mtag`` are arbitrary
-used-defined names. Tags can be used anywhere inside of a regular expression;
-semantically they are just position markers. Tags of the form ``@stag`` are
-called s-tags: they denote a single submatch value (the last input position
-where this tag matched). Tags of the form ``#mtag`` are called m-tags: they
-denote multiple submatch values (the whole history of repetitions of this tag).
-All tags should be defined by the user as variables with the corresponding
-names. With standalone tags re2c uses leftmost greedy disambiguation: submatch
-positions correspond to the leftmost matching path through the regular
-expression.
+**Tags**
+    The first option is to use standalone *tags* of the form ``@stag`` or
+    ``#mtag``, where ``stag`` and ``mtag`` are arbitrary used-defined names.
+    Tags are enabled with ``-T --tags`` option or ``re2c:tags = 1``
+    configuration. Semantically tags are position markers: they can be
+    inserted anywhere in a regular expression, and they bind to the
+    corresponding position (or multiple positions) in the input string.
+    *S-tags* bind to the last matching position, and *m-tags* bind to a list of
+    positions (they may be used in repetition subexpressions, where a single
+    position in a regular expression corresponds to multiple positions in the
+    input string). All tags should be defined by the user, either manually or
+    with the help of ``svars:re2c`` and ``mvars:re2c`` directives.
+    If there is more than one way tags can be matched against the input,
+    ambiguity is resolved using leftmost greedy disambiguation strategy.
 
-The second option is ``-P --posix-captures``: it enables POSIX-compliant
-capturing groups. In this mode parentheses in regular expressions denote the
-beginning and the end of capturing groups; the whole regular expression is group
-number zero. The number of groups for the matching rule is stored in a variable
-``yynmatch``, and submatch results are stored in ``yypmatch`` array. Both
-``yynmatch`` and ``yypmatch`` should be defined by the user, and ``yypmatch``
-size must be at least ``[yynmatch * 2]``. re2c provides a directive
-``/*!maxnmatch:re2c*/`` that defines ``YYMAXNMATCH``: a constant  equal to the
-maximal value of ``yynmatch`` among all rules. Note that re2c implements
-POSIX-compliant disambiguation: each subexpression matches as long as possible,
-and subexpressions that start earlier in regular expression have priority over
-those starting later. Capturing groups are translated into s-tags under the
-hood, therefore we use the word "tag" to describe them as well.
+**Captures**
+   The second option is to use *capturing groups*. They are enabled with
+   ``--captures`` option or ``re2c:captures = 1`` configuration. There are two
+   flavours for different disambiguation policies, ``--leftmost-captures``
+   (the default) is for leftmost greedy policy, and, ``--posix-captures`` is
+   for POSIX longest-match policy. In this mode all parenthesized
+   subexpressions are considered capturing groups, and a bang can be used to
+   mark non-capturing groups: ``(! ... )``. With ``--invert-captures`` option or
+   ``re2c:invert-captures = 1`` configuration the meaning of bang is inverted.
+   The number of groups for the matching rule is stored in a variable
+   ``yynmatch`` (the whole regular expression is group number zero), and
+   submatch results are stored in ``yypmatch`` array. Both ``yynmatch`` and
+   ``yypmatch`` should be defined by the user, and ``yypmatch`` size must be at
+   least ``[yynmatch * 2]``. re2c provides a directive ``maxnmatch:re2c``
+   that defines ``YYMAXNMATCH``, a constant that equals to the maximum value of
+   ``yynmatch`` among all rules.
 
-With both ``-P --posix-captures`` and ``T --tags`` options re2c uses efficient
-submatch extraction algorithm described in the
-`Tagged Deterministic Finite Automata with Lookahead <https://arxiv.org/abs/1907.08837>`_
-paper. The overhead on submatch extraction in the generated lexer grows with the
-number of tags --- if this number is moderate, the overhead is barely
-noticeable. In the lexer tags are implemented using a number of tag variables
-generated by re2c. There is no one-to-one correspondence between tag variables
-and tags: a single variable may be reused for different tags, and one tag may
-require multiple variables to hold all its ambiguous values. Eventually
-ambiguity is resolved, and only one final variable per tag survives. When a rule
-matches, all its tags are set to the values of the corresponding tag variables.
-The exact number of tag variables is unknown to the user; this number is
-determined by re2c. However, tag variables should be defined by the user as a
-part of the lexer state and updated by ``YYFILL``, therefore re2c provides
-directives ``/*!stags:re2c*/`` and ``/*!mtags:re2c*/`` that can be used to
-declare, initialize and manipulate tag variables. These directives have two
-optional configurations: ``format = "@@";`` (specifies the template where ``@@``
-is substituted with the name of each tag variable), and ``separator = "";``
-(specifies the piece of code used to join the generated pieces for different
-tag variables).
+**Captvars**
+   Another way to use capturing groups is the ``--captvars`` option or
+   ``re2c:captvars = 1`` configuration. The only difference with ``--captures``
+   is in the way the generated code stores submatch results: instead of
+   ``yynmatch`` and ``yypmatch`` re2c generates variables ``yytl<k>`` and
+   ``yytr<k>`` for *k*-th capturing group (the user should declare these with
+   ``svars:re2c`` directive). Captures with variables support two dismbiguation
+   policies: ``--leftmost-captvars`` or ``re2c:leftmost-captvars = 1`` for
+   leftmost greedy policy (the default one) and ``--posix-captvars`` or
+   ``re2c:posix-captvars`` for POSIX longest-match policy.
+
+Under the hood all these options translate into tags and
+`Tagged Deterministic Finite Automata with Lookahead <https://arxiv.org/abs/1907.08837>`_.
+The core idea of TDFA is to minimize the overhead on submatch extraction.
+In the extreme, if there're no tags or captures in a regular expression, TDFA is
+just an ordinary DFA. If the number of tags is moderate, the overhead is barely
+noticeable. The generated TDFA uses a number of *tag variables* which do not map
+directly to tags: a single variable may be used for different tags, and a tag
+may require multiple variables to hold all its possible values. Eventually
+ambiguity is resolved, and only one final variable per tag survives. Tag
+variables should be defined using ``stags:re2c`` or ``mtags:re2c`` directives.
+If the lexer state is stored, tag variables should be part of it. They also
+need to be updated  by ``YYFILL``.
 
 S-tags support the following operations:
 
diff --git a/doc/manual/submatch/submatch_example_captures.rst_ b/doc/manual/submatch/submatch_example_captures.rst_
index 09b24f5e5..3bb3f7e8b 100644
--- a/doc/manual/submatch/submatch_example_captures.rst_
+++ b/doc/manual/submatch/submatch_example_captures.rst_
@@ -1,2 +1,2 @@
-Here is an example of using POSIX capturing groups to parse semantic versions.
+Here is an example of using capturing groups to parse semantic versions.