From ddfe6b3e16192c87f5680efb84eccdf499f174b2 Mon Sep 17 00:00:00 2001 From: Kurtis Rader Date: Sun, 13 Oct 2019 20:49:16 -0700 Subject: [PATCH] Implement an optget() like alternative Implement an alternative to the legacy AST optget() function. This looks and behaves much like the getopt_long() function provided by GNU and BSD based platforms but with two extensions supported by AST optget(). First, integers represented as short flags; e.g., `-123`. Second, short flags that are prefixed by `+` rather than `-`. It also deviates from getopt_long() by not supporting some legacy behaviors of that API which we don't need. This is related to Github issue #507 because having such a function is required for parsing the `set`, `typeset`, and `ksh` command args. That is because those commands/programs require supporting short flags like `+o abc` to mean the opposite of `-o abc`. Something that is effectively impossible using the borg standard getopt_long() function. Handling numeric args that otherwise look like an invalid short flag can be done with getopt_long_only() but this implementation makes it much easier. Too, without the risk of recognizing `-abc` as equivalent to `--abc` since that wasn't supported by the legacy AST optget() function. Related #507 --- src/lib/libast/include/optget_long.h | 42 +++ src/lib/libast/misc/meson.build | 1 + src/lib/libast/misc/optget_long.c | 336 +++++++++++++++++ src/lib/libast/tests/misc/meson.build | 2 +- src/lib/libast/tests/misc/optget_long.c | 472 ++++++++++++++++++++++++ 5 files changed, 852 insertions(+), 1 deletion(-) create mode 100644 src/lib/libast/include/optget_long.h create mode 100644 src/lib/libast/misc/optget_long.c create mode 100644 src/lib/libast/tests/misc/optget_long.c diff --git a/src/lib/libast/include/optget_long.h b/src/lib/libast/include/optget_long.h new file mode 100644 index 000000000000..7da58ba20cf8 --- /dev/null +++ b/src/lib/libast/include/optget_long.h @@ -0,0 +1,42 @@ +// Symbols for use with the optget_long() function. +#ifndef __OPTGET_LONG_H +#define __OPTGET_LONG_H + +#include +#include + +// The first three vars mirror the getopt() optind, optopt, and optarg vars. The fourth is present +// to make distingquishing options prefixed by a `+` rather than `-` sign easier. +extern int optget_ind; // how many args have been scanned +extern int optget_opt; // set to the option character which was unrecognized +extern char *optget_arg; // points to the value associated with an option +extern bool optget_plus; // true if option prefix was `+`, false if `-` +extern int64_t optget_num; // set to the number found + +// In getopt_long() implementations these are named `no_argument`, `required_argument`, and +// `optional_argument`. They're also likely to be preprocessor symbols; i.e, `#define` symbols. The +// `no_argument` symbol is zero in those implementations. We employ the same convention even though +// it would be safer to use a non-zero value. +enum long_arg { optget_no_arg = 0, optget_required_arg, optget_optional_arg }; + +// We don't name this `option` so that we can also `#include ` +// without causing a name clash. +struct optget_option { + const char *name; + enum long_arg has_arg; + int *flag; + int val; +}; + +extern int optget_long(int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts); + +// This is meant for those rare situations where a magic word is prefixed by `-` and thus looks like +// a short flag. For example, `kill -HUP`. Unless you absolutely have to use this do not use it. +// This function has some unexpected behaviors such as letting you concatenate short and long +// options in the same argument. If you simply need to support numbers that look like bundled short +// options (e.g., `-123`) simply preface the `short_opts` string with `#`. +extern int optget_long_only(int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts); + +#endif // !__OPTGET_LONG_H diff --git a/src/lib/libast/misc/meson.build b/src/lib/libast/misc/meson.build index fcbbe83369a0..e6e4c3e951bd 100644 --- a/src/lib/libast/misc/meson.build +++ b/src/lib/libast/misc/meson.build @@ -9,6 +9,7 @@ libast_files += [ 'misc/fgetcwd.c', 'misc/glob.c', 'misc/optget.c', + 'misc/optget_long.c', 'misc/procclose.c', 'misc/procfree.c', 'misc/procopen.c', diff --git a/src/lib/libast/misc/optget_long.c b/src/lib/libast/misc/optget_long.c new file mode 100644 index 000000000000..7b09b7dddabd --- /dev/null +++ b/src/lib/libast/misc/optget_long.c @@ -0,0 +1,336 @@ +// This is derived from the GNU `getopt_long()` implementation but has been drastically altered. +// Both to simplify that code by eliminating features we don't need and also to implement behaviors +// of the legacy AST `optget()` function that `optget_long()` replaces. +// +// The `optget_long()` function in this module is similar to `getopt_long()` but incorporates +// features from the old AST `optget()` function that it replaces. In particular it has these +// characteristics: +// +// a) Only supports POSIXLY_CORRECT mode. +// +// b) Optionally allows flags to have a "+" prefix as well as "-" by putting a "+" at the front of +// the short option string. +// +// c) Optionally allows a numeric flag; e.g., -123 by putting a `#` at the front of +// the short option string. In this case the special value -2 is returned as the short option +// that was found and optget_num is set to the number masquarading as a flag (but with the "-" +// prefix ignored). You cannot prefix the number with "+" even if the short_opts string contains +// "+". Nor can you specify negative numbers via this mechanism (e.g., `--123`) because the +// legacy AST `optget()` doesn't allow that. We might want to relax this restriction. +// +// d) The magic prefix characters recognized in the short option string are different from those of +// the borg standard getopt() and getopt_long() APIs. +// +// e) Allows ambiguous long options; e.g., `--he` as equivalent to `--help` assuming "help" is a +// valid long option and "he" is not a valid long option. +// +// If you need to specify both (b) and (c) behaviors you must use "+#", not "#+". +// +// Note that using "#" as a short flag is legal even though it's presence as the first char of the +// short options string is special -- enabling integers as a flag behavior; e.g., `-123`. If you +// want to support "-#" but do not want to allow `-123` as a magic number option you have to have at +// least one other short option letter before the "#" character in the short option string. +// +// A short flag that looks like numeric value, if '#' is in the short_opts, returns -2 when such a +// magic number is scanned. And `optget_num` is set to the value. +// +// Similarly this implementation does not implement some `getopt_long()` behaviors (GNU or BSD). For +// example, it always returns '?' for an unknown option and ':' for an option missing a value. You +// can't select the legacy behavior where it would return '?' for both conditions. It does implement +// the GNU `getopt_long()` behavior of specifying a short option that takes an optional argument +// when two consecutive colons following the short option character. Like GNU `getopt()` the value +// must be bundled with the short option. Otherwise it is ignored and terminates the scan. +// +// Also, because the AST `optget()` function did not support it, this implementation does not map +// "-W;foo" to "--foo" like GNU `getopt()` does. If the caller needs this they have to put `W:` in +// the short_opts string and handle the mapping themself. +// +#include "config_ast.h" // IWYU pragma: keep + +#include +#include +#include + +#include "ast.h" // for strton64() +#include "ast_assert.h" +#include "optget_long.h" + +// The first three vars mirror the getopt() optind, optopt, and optarg vars. The fourth is present +// to make distingquishing options prefixed by a `+` rather than `-` sign easier. +int optget_ind = 0; // how many args have been scanned +int optget_opt = 0; // set to the option character which was unrecognized +char *optget_arg = NULL; // points to the value associated with an option +bool optget_plus = false; // true if option prefix was `+`, false if `-` +int64_t optget_num = 0; // set to the number found + +static int _first_nonopt = 0; +static int _last_nonopt = 0; +static const char *_next_char = NULL; +static bool _plus_prefix_allowed = false; +static bool _numeric_flag_allowed = false; + +// Initialize our scanning state. +static const char *optget_init(const char *short_opts) { + optget_ind = 1; // skip argv[0], the command/program name + _first_nonopt = _last_nonopt = optget_ind; + _next_char = NULL; + if (short_opts[0] == '+') { + _plus_prefix_allowed = true; + short_opts++; + } else { + _plus_prefix_allowed = false; + } + if (short_opts[0] == '#') { + _numeric_flag_allowed = true; + short_opts++; + } else { + _numeric_flag_allowed = false; + } + return short_opts; +} + +// +// Scan elements of argv, whose length is argc, for option characters given in `short_opts`. +// +// If an element of argv starts with `-`, and is not exactly "-" or "--", then it is an option +// element. The characters of this element (aside from the initial `-`) are option characters. If +// `optget_long()` is called repeatedly, it returns successively each of the option characters from +// each of the option elements. +// +// If `optget_long()` finds another option character, it returns that character, updating +// `optget_ind` and `_next_char` so that the next call to `optget_long()` can resume the scan with +// the following option character or argv element. +// +// If there are no more option characters, `optget_long()` returns -1. Then `optget_ind` is the +// index in argv of the first argv element that is not an option. +// +// `short_opts` is a string containing the legitimate option characters. If an option character is +// seen that is not listed in short_opts return `?`. +// +// If a char in short_opts is followed by a colon, that means it wants an arg, so the following text +// in the same argv element, or the text of the following argv element, is returned in `optget_arg`. +// Two colons mean an option that wants an optional arg; if there is text in the current +// argv element, it is returned in `optget_arg`, otherwise `optget_arg` is set to zero. +// +// Long options begin with "--" instead of "-". Their names may be abbreviated as long as the +// abbreviation is unique or is an exact match for some defined option. If they have an argument, +// it follows the option name in the same argv element, separated from the option name by a `=`, or +// else the in next argv element. When `optget_long()` finds a long-named option, it returns 0 if +// that option's `flag` field is nonzero, the value of the option's `val` field if the `flag` field +// is zero. +// +// `long_opts` is a vector of `struct optget_option` terminated by an element containing a name +// which is zero. +// +static int _optget_long(int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts, bool long_only) { + if (argc < 1) return -1; + if (optget_ind == 0) optget_init(short_opts); // initialize our scanning state + + // Make sure some results aren't carried forward from the previous call. This is slightly + // different from most `getopt_long()` implementations but is safer and saner. + optget_opt = 0; + optget_arg = NULL; + // We don't do `optget_plus = false;` at this point because bundled short options need to + // remember which prefix was used when the first short option was scanned. + + if (!_next_char || *_next_char == '\0') { // advance to the next argv element + // Give _first_nonopt & _last_nonopt rational values if optget_ind has been moved back by + // the user. Who may also have changed the arguments. + if (_last_nonopt > optget_ind) _last_nonopt = optget_ind; + if (_first_nonopt > optget_ind) _first_nonopt = optget_ind; + + // The special argv element `--` means stop scanning for options. Skip it as if it were a + // null option, then skip everything else like a non-option. + if (optget_ind != argc && !strcmp(argv[optget_ind], "--")) { + optget_ind++; + // This condition should never be true since we always use POSIXLY_CORRECT behavior and + // thus do not permute the args. The GNU version calls exchange() if the assertion + // failed. Presumably due to its support for non-POSIXLY_CORRECT behavior. But I'm + // paranoid; hence the assertion. + assert(!(_first_nonopt != _last_nonopt && _last_nonopt != optget_ind)); + + if (_first_nonopt == _last_nonopt) _first_nonopt = optget_ind; + _last_nonopt = argc; + optget_ind = argc; + } + + // If we have done all the argv elements, stop the scan and back over any non-options that + // we skipped. + if (optget_ind == argc) { + // Set the next-arg-index to point at the non-options that we previously skipped, so the + // caller will digest them. + if (_first_nonopt != _last_nonopt) optget_ind = _first_nonopt; + return -1; + } + + // If we have come to a non-option stop the scan. + if (argv[optget_ind][1] == '\0') return -1; + char c = argv[optget_ind][0]; + optget_plus = (c == '+'); + if (!(c == '-' || (_plus_prefix_allowed && optget_plus))) return -1; + + // We have found another option argv element. Skip the initial punctuation. + _next_char = argv[optget_ind] + 1; + if (!optget_plus && long_opts && argv[optget_ind][1] == '-') _next_char++; + } + + // Decode the current option argv element. + + // Check whether the argv element is a long option. + // + // If long_only and the argv element has the form "-f", where f is a valid short option, don't + // consider it an abbreviated form of a long option that starts with f. Otherwise there would + // be no way to give the "-f" short option. + // + // On the other hand, if there's a long option "fubar" and the argv element is "-fu", do + // consider that an abbreviation of the long option, just like "--fu", and not "-f" with + // arg "u". + // + // This distinction seems to be the most useful approach. + if (long_opts && !optget_plus && + (argv[optget_ind][1] == '-' || + (long_only && (argv[optget_ind][2] || !strchr(short_opts, argv[optget_ind][1]))))) { + const char *name_end; + const struct optget_option *p; + const struct optget_option *pfound = NULL; + int exact = 0; + int ambig = 0; + int option_index; + + for (name_end = _next_char; *name_end && *name_end != '='; name_end++) { + ; // empty loop + } + + // Test all long options for either exact match or abbreviated matches. + for (p = long_opts, option_index = 0; p->name; p++, option_index++) { + if (strncmp(p->name, _next_char, name_end - _next_char)) continue; + if ((unsigned int)(name_end - _next_char) == (unsigned int)strlen(p->name)) { + // Exact match found. + pfound = p; + exact = 1; + break; + } else if (!pfound) { + // First nonexact match found. + pfound = p; + } else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || + pfound->val != p->val) { + // Second or later nonexact match found. + ambig = 1; + } + } + + if (ambig && !exact) { + _next_char += strlen(_next_char); + optget_ind++; + optget_opt = 0; + return '?'; + } + + if (pfound) { + optget_ind++; + if (*name_end) { + if (pfound->has_arg != optget_no_arg) { + optget_arg = (char *)name_end + 1; // discard `const` qualifier + } else { + _next_char += strlen(_next_char); + optget_opt = pfound->val; + return '?'; + } + } else if (pfound->has_arg == optget_required_arg) { + if (optget_ind < argc) { + optget_arg = (char *)argv[optget_ind++]; // discard `const` qualifier + } else { + _next_char += strlen(_next_char); + optget_opt = pfound->val; + return ':'; + } + } + _next_char += strlen(_next_char); + if (pfound->flag) { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + // Can't find it as a long option. If the option starts with `--` or is not a valid short + // option, then it's an error. Otherwise interpret it as a short option. + if (!long_only || argv[optget_ind][1] == '-' || !strchr(short_opts, *_next_char)) { + optget_ind++; + _next_char = (char *)""; + return '?'; + } + } + + // Look at and handle the next short option-character. + char c = *_next_char++; + const char *temp = strchr(short_opts, c); + + // If it isn't a recognized short option, and we allow numeric flags, see if it is a recognized + // number. If it is return the number. + if (!temp && _numeric_flag_allowed) { + char *cp; + optget_num = strton64(_next_char - 1, &cp, NULL, 0); + if (!*cp) { // looks like a number so return it + _next_char = NULL; // force advancing to the next argv element on the next call + ++optget_ind; + return -2; // tell the caller a number masquerading as a flag was found + } + } + + // Increment `optget_ind` when we start to process its last character. + if (*_next_char == '\0') ++optget_ind; + + if (!temp || c == ':' || c == ';') { + optget_opt = c; + return '?'; + } + + if (temp[1] != ':') return c; + if (temp[2] == ':') { + // This is an option that accepts an argument optionally. + if (*_next_char != '\0') { + optget_arg = (char *)_next_char; // discard `const` qualifier + optget_ind++; + } else { + optget_arg = NULL; + } + _next_char = NULL; + return c; + } + + // This is an option that requires an argument. + if (*_next_char != '\0') { + optget_arg = (char *)_next_char; // discard `const` qualifier + // If we end this argv element by taking the rest as an arg, we must advance to the + // next element now. + optget_ind++; + } else if (optget_ind == argc) { + optget_opt = c; + c = ':'; + } else { + // We already incremented `optget_ind' once; increment it again when taking next + // argv element as argument. + optget_arg = (char *)argv[optget_ind++]; // discard `const` qualifier + } + _next_char = NULL; + + return c; +} + +// This should be the usual way to parse CLI args. +int optget_long(int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts) { + return _optget_long(argc, argv, short_opts, long_opts, false); +} + +// This is meant for those rare situations where a magic word is prefixed by `-` and thus looks like +// a short flag. For example, `kill -HUP`. Unless you absolutely have to use this do not use it. +// This function has some unexpected behaviors such as letting you concatenate short and long +// options in the same argument. If you simply need to support numbers that look like bundled short +// options (e.g., `-123`) simply preface the `short_opts` string with `#`. +int optget_long_only(int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts) { + return _optget_long(argc, argv, short_opts, long_opts, true); +} diff --git a/src/lib/libast/tests/misc/meson.build b/src/lib/libast/tests/misc/meson.build index a4c864428d89..8904824fe7c9 100644 --- a/src/lib/libast/tests/misc/meson.build +++ b/src/lib/libast/tests/misc/meson.build @@ -1,6 +1,6 @@ test_dir = meson.current_source_dir() # TODO: Enable 'opt' tests -tests = [ 'debug', 'glob', 'stk', 'environ' ] +tests = [ 'debug', 'glob', 'optget_long', 'stk', 'environ' ] incdir = include_directories('..', '../../include/') # # Some tests fail for inexplicable reasons on some platforms. In some cases, such as Cygwin, things diff --git a/src/lib/libast/tests/misc/optget_long.c b/src/lib/libast/tests/misc/optget_long.c new file mode 100644 index 000000000000..5f507a2a4e50 --- /dev/null +++ b/src/lib/libast/tests/misc/optget_long.c @@ -0,0 +1,472 @@ +// Verify basic functionality of the `optget_long()` function and related code. When possible we +// also execute each test using the platform `getopt_long()` function since the two should behave +// the same modulo the expected differences. +#include "config_ast.h" // iwyu pragma: keep + +#include +#include +#include +#include +#include +#include + +#include "optget_long.h" +#include "terror.h" + +struct optget_test { + int rv; + int ind; + int opt; + char *arg; + int64_t num; + bool plus; +}; + +// Old GNU `getopt_long_only()` implementations have a bug. See `check_for_getopt_long_only_bug()`. +static bool getopt_long_only_works = true; + +#define TERROR(line, ...) \ + do { \ + (Tstline = line), tsterror(__VA_ARGS__); \ + } while (0) + +// Calculate length of a NULL terminated list of strings. +static __inline__ int argv_len(char *const *argv) { + int n = 0; + while (*argv++) ++n; + return n; +} + +// Convert a `struct optget_option*` to a `struct option*`. Caller must free the returned pointer. +struct option *construct_struct_option(const struct optget_option *long_opts) { + int n; + for (n = 0; long_opts[n].name; ++n) { + ; // empty loop + } + ++n; + struct option *lo = malloc(n * sizeof(struct option)); + + for (int i = 0; i < n; ++i) { + lo[i].name = long_opts[i].name; + lo[i].flag = long_opts[i].flag; + lo[i].val = long_opts[i].val; + if (long_opts[i].has_arg == optget_no_arg) { + lo[i].has_arg = no_argument; + } else if (long_opts[i].has_arg == optget_required_arg) { + lo[i].has_arg = required_argument; + } else { + lo[i].has_arg = optional_argument; + } + } + + return lo; +} + +// Execute a sequence of `optget_long()` or `optget_long_only()` calls; verifying each result +// against successive elements in a list of expected values. +static void _test_optget_long(int line, bool long_only, int argc, char *const *argv, + const char *short_opts, const struct optget_option *long_opts, + const struct optget_test *results) { + int (*funcp)(int, char *const *, const char *, const struct optget_option *) = + long_only ? optget_long_only : optget_long; + const char *funcname = long_only ? "optget_long_only" : "optget_long"; + + optget_ind = 0; + for (int step = 0;; ++step) { + int rv = funcp(argc, argv, short_opts, long_opts); + if (rv != results[step].rv) { + TERROR(line, "%s() step #%d rv wrong: expected %d, got %d", funcname, step, + results[step].rv, rv); + } + + if (optget_ind != results[step].ind) { + TERROR(line, "%s() step #%d ind wrong: expected %d, got %d", funcname, step, + results[step].ind, optget_ind); + } + + if (results[step].rv == -1) break; // the test is done, none of the other vars matter + + if (rv == '?' && optget_opt != results[step].opt) { + TERROR(line, "%s() step #%d opt wrong: expected %d, got %d", funcname, step, + results[step].opt, optget_opt); + } + + if (optget_arg || results[step].arg) { + bool okay = optget_arg && results[step].arg && !strcmp(optget_arg, results[step].arg); + if (!okay) { + TERROR(line, "%s() step #%d arg wrong: expected |%s|, got |%s|", funcname, step, + results[step].arg, optget_arg); + } + } + + if (optget_plus != results[step].plus) { + TERROR(line, "%s() step #%d plus wrong: expected %d, got %d", funcname, step, + results[step].plus, optget_plus); + } + if (rv == -2 && optget_num != results[step].num) { + TERROR(line, "%s() step #%d opt wrong: expected %" PRId64 ", got %" PRId64, funcname, + step, results[step].num, optget_num); + } + } +} + +// Execute a sequence of `getopt_long()` calls. Verify each result against successive elements in a +// list of expected values. +// +// This verifies that our test produces the expected sequence of results when calling +// `getopt_long()` (or `getopt_long_only()`). Our `optget_long()` implementation is meant to be +// compatible with `getopt_long()`; modulo the two `optget_long()` extensions. So we want to verify +// that our test definition is correct when those two extenions are not used. +// +// This can't be called if the test uses the numeric flag, `#`, or the `+` option prefix supported +// by `optget_long()`. Those features are not supported by `getopt_long()` or emulated by this +// function. +static void _test_getopt_long(int line, bool long_only, int argc, char *const *argv, + const char *short_opts, const struct optget_option *long_opts, + const struct optget_test *results) { + if (long_only && !getopt_long_only_works) return; + + int (*funcp)(int, char *const *, const char *, const struct option *, int *) = + long_only ? getopt_long_only : getopt_long; + const char *funcname = long_only ? "getopt_long_only" : "getopt_long"; + char so[100] = "+:"; // we require POSIXLY_CORRECT and '?' vs ':' behavior + (void)strlcpy(so + 2, short_opts, sizeof(so) - 2); + struct option *lo = construct_struct_option(long_opts); + + optind = opterr = 0; + for (int step = 0;; ++step) { + int rv = funcp(argc, argv, so, lo, NULL); + if (rv != results[step].rv) { + TERROR(line, "%s() step #%d rv wrong: expected %d, got %d", funcname, step, + results[step].rv, rv); + } + + if (optind != results[step].ind) { + TERROR(line, "%s() step #%d ind wrong: expected %d, got %d", funcname, step, + results[step].ind, optind); + } + + if (results[step].rv == -1) break; + + if (rv == '?' && optopt != results[step].opt) { + // Some getopt_long_only() implementations have a bug. Bundled short options like `-ab`, + // where `a` is recognized and `b` is not, does not correctly set `optopt` to `b`; it + // sets `optopt` to zero. Our optget_long_only() behaves correctly. This works around + // that bug to avoid false positive failures. + if (!(long_only && optopt == 0 && results[step].opt != 0)) { + TERROR(line, "%s() step #%d opt wrong: expected %d, got %d", funcname, step, + results[step].opt, optopt); + } + } + + if (optarg || results[step].arg) { + bool okay = optarg && results[step].arg && !strcmp(optarg, results[step].arg); + if (!okay) { + TERROR(line, "%s() step #%d arg wrong: expected |%s|, got |%s|", funcname, step, + results[step].arg, optarg); + } + } + } + + free(lo); +} + +static void test_optget_long(int line, int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts, + const struct optget_test *results) { + // If the test includes behavior unique to `optget_long()` skip the verification of the test + // definition against the standard `getopt_long()`. + if (*short_opts != '#' && *short_opts != '+') { + _test_getopt_long(line, false, argc, argv, short_opts, long_opts, results); + } + + _test_optget_long(line, false, argc, argv, short_opts, long_opts, results); +} + +static void test_optget_long_only(int line, int argc, char *const *argv, const char *short_opts, + const struct optget_option *long_opts, + const struct optget_test *results) { + // If the test includes behavior unique to `optget_long()` skip the verification of the test + // definition against the standard `getopt_long()`. + if (*short_opts != '#' && *short_opts != '+') { + _test_getopt_long(line, true, argc, argv, short_opts, long_opts, results); + } + + _test_optget_long(line, true, argc, argv, short_opts, long_opts, results); +} + +// Verify the behavior when there are no short or long options defined. Even when neither, either, +// or both, of the behavior modifier symbols are present in the short options string. Note that the +// presence of those behavior modifier symbols when no other chars are present also means there are +// no options defined. +static void test_no_options() { + const char *short_opts1 = ""; + const char *short_opts2 = "+"; + const char *short_opts3 = "#"; + const char *short_opts4 = "+#"; + const char *short_opts[] = {short_opts1, short_opts2, short_opts3, short_opts4, NULL}; + struct optget_option long_opts[] = { + {NULL, optget_no_arg, NULL, 0}, + }; + char *const argv1[] = {"cmd", "-x", "arg1", NULL}; + char *const argv2[] = {"cmd", "--x", "arg1", NULL}; + struct optget_test *results; + + for (const char **so = short_opts; *so; ++so) { + // No options and no argv entries other than the command name should always return -1 on + // the first call. + results = (struct optget_test[]){{.rv = -1, .ind = 1}}; + test_optget_long(__LINE__, 1, argv1, *so, long_opts, results); + + // No options and an argv entry that looks like a short option means an unrecognized + // option is reported. + results = (struct optget_test[]){{.rv = '?', .opt = 'x', .ind = 2}, // + {.rv = -1, .ind = 2}}; + test_optget_long(__LINE__, 3, argv1, *so, long_opts, results); + + // No options and an argv entry that looks like a long option means an unrecognized + // option is reported. + results = (struct optget_test[]){{.rv = '?', .ind = 2}, // + {.rv = -1, .ind = 2}}; + test_optget_long(__LINE__, 3, argv2, *so, long_opts, results); + + // No options and one argv entry that does not look like an option. + results = (struct optget_test[]){{.rv = -1, .ind = 1}}; + test_optget_long(__LINE__, 3, argv1 + 1, *so, long_opts, results); + } +} + +// Verify the handling of numeric flags; e.g., `-123`. +static void test_numeric_options() { + const char *short_opts1 = ""; + const char *short_opts2 = "+"; + const char *short_opts3 = "#"; + const char *short_opts4 = "+#"; + struct optget_option long_opts1[] = { + {"123", optget_no_arg, NULL, -7}, + {NULL, optget_no_arg, NULL, 0}, + }; + char *const argv[] = {"cmd", "-123", "--123", "arg1", NULL}; + int argc = argv_len(argv); + + // We haven't requested recognition of numbers that look like short flags. + struct optget_test results1[] = {{.rv = '?', .opt = '1', .ind = 1}, // + {.rv = '?', .opt = '2', .ind = 1}, + {.rv = '?', .opt = '3', .ind = 2}, + {.rv = -7, .ind = 3}, + {.rv = -1, .ind = 3}}; + test_optget_long(__LINE__, argc, argv, short_opts1, long_opts1, results1); + test_optget_long(__LINE__, argc, argv, short_opts2, long_opts1, results1); + + // We have requested recognition of numbers that look like short flags. + struct optget_test results2[] = {{.rv = -2, .ind = 2, .num = 123}, // + {.rv = -7, .ind = 3}, + {.rv = -1, .ind = 3}}; + test_optget_long(__LINE__, argc, argv, short_opts3, long_opts1, results2); + test_optget_long(__LINE__, argc, argv, short_opts4, long_opts1, results2); +} + +// Verify complex combinations of short and long options, some that take an optional argument and +// some that take a required argument, works correctly. +static void test_complex_options() { + // Note that using "#" as a short flag is legal even though it's presence as the first char of + // the short options string is special -- enabling integer as a flag behavior; e.g., `-123`. I + // hope no one ever uses the two behaviors at the same time but it is allowed. + const char *short_opts = "a#x::"; + struct optget_option long_opts[] = { + {"help", optget_no_arg, NULL, -5}, + {"all", optget_no_arg, NULL, 'a'}, + {"level", optget_required_arg, NULL, 'l'}, + {"user", optget_optional_arg, NULL, -3}, + {NULL, optget_no_arg, NULL, 0}, + }; + char **argv; + int argc; + struct optget_test *results; + + argv = (char *[]){"cmd", "-ax", "-#", "--help", "--all", "-xX", + "--level=lvl1", "--level", "lvl2", "--user", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'a', .ind = 1}, // + {.rv = 'x', .ind = 2}, + {.rv = '#', .ind = 3}, + {.rv = -5, .ind = 4}, + {.rv = 'a', .ind = 5}, + {.rv = 'x', .ind = 6, .arg = "X"}, + {.rv = 'l', .ind = 7, .arg = "lvl1"}, + {.rv = 'l', .ind = 9, .arg = "lvl2"}, + {.rv = -3, .ind = 10, .arg = NULL}, + {.rv = -1, .ind = 10}}; + test_optget_long(__LINE__, argc, argv, short_opts, long_opts, results); + + // A short option that has an optional value has to bundle the value with the option. Otherwise, + // if the value is a separate arg it is ignored and terminates the option scan (at least in + // POSIXLY_CORRECT mode which we require). + argv = (char *[]){"cmd", "-x", "X", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'x', .ind = 2}, // + {.rv = -1, .ind = 2}}; + test_optget_long(__LINE__, argc, argv, short_opts, long_opts, results); + + // The special "--" arg terminates the scan. If an arg that looks like a valid option follows + // that special arg it is treated as a normal, non-option, arg. + argv = (char *[]){"cmd", "-a", "--", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'a', .ind = 2}, // + {.rv = -1, .ind = 3}}; + test_optget_long(__LINE__, argc, argv, short_opts, long_opts, results); +} + +// Verify the handling of options that are prefixed by `+`. Something unique to ksh; not a POSIX +// mandated behavior. +static void test_plus_options() { + const char *short_opts1 = "ny"; + const char *short_opts2 = "+ny"; + struct optget_option long_opts1[] = { + {"help", optget_no_arg, NULL, -5}, + {NULL, optget_no_arg, NULL, 0}, + }; + char **argv; + int argc; + struct optget_test *results; + + // We haven't requested recognition of options prefixed by `+` so should recognize "-n" but not + // "+n". + argv = (char *[]){"cmd", "-n", "+n", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'n', .ind = 2}, // + {.rv = -1, .ind = 2}}; + test_optget_long(__LINE__, argc, argv, short_opts1, long_opts1, results); + + // We have requested recognition of options prefixed by `+` so should recognize "-n" and "+n". + argv = (char *[]){"cmd", "-n", "-y", "+y", "+n", "-n", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'n', .ind = 2, .plus = false}, // + {.rv = 'y', .ind = 3, .plus = false}, + {.rv = 'y', .ind = 4, .plus = true}, + {.rv = 'n', .ind = 5, .plus = true}, + {.rv = 'n', .ind = 6, .plus = false}, + {.rv = -5, .ind = 7}, + {.rv = -1, .ind = 7}}; + test_optget_long(__LINE__, argc, argv, short_opts2, long_opts1, results); + + // We have requested recognition of options prefixed by `+` so should recognize "-n" and "+n" + // plus bundling behind either prefix. + argv = (char *[]){"cmd", "-ny", "+yn", "--", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'n', .ind = 1, .plus = false}, // + {.rv = 'y', .ind = 2, .plus = false}, + {.rv = 'y', .ind = 2, .plus = true}, + {.rv = 'n', .ind = 3, .plus = true}, + {.rv = -1, .ind = 4}}; + test_optget_long(__LINE__, argc, argv, short_opts2, long_opts1, results); + + // We have requested recognition of short options prefixed by `+` but there is an unrecognized + // `+y` argument. + argv = (char *[]){"cmd", "-n", "-y", "+y", "+n", "-n", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'n', .ind = 2}, // + {.rv = 'y', .ind = 3}, + {.rv = -1, .ind = 3}}; + test_optget_long(__LINE__, argc, argv, short_opts1, long_opts1, results); + + // We have requested recognition of short options prefixed by `+` and all options are + // recognized. + argv = (char *[]){"cmd", "-n", "-y", "+y", "+n", "-n", "--help", NULL}; + argc = argv_len(argv); + results = (struct optget_test[]){{.rv = 'n', .ind = 2}, // + {.rv = 'y', .ind = 3}, + {.rv = 'y', .ind = 4, .plus = true}, + {.rv = 'n', .ind = 5, .plus = true}, + {.rv = 'n', .ind = 6}, + {.rv = -5, .ind = 7}, + {.rv = -1, .ind = 7}}; + test_optget_long(__LINE__, argc, argv, short_opts2, long_opts1, results); +} + +// Verify the handling of long flags with a short flag prefix when we explicitly recognize that +// situation; e.g., `kill -HUP` being equivalent to `kill --HUP` rather than `kill -H -U -P`. +// +// Note that `getopt_long_only()` and our `optget_long_only()` are slightly weird. For example, you +// can bundle a short option followed by a long option. I think this is slightly weird because I +// would expect that after seeing a valid short option only short options would be recognized until +// the next argument is scanned. But that is not how modern `getopt_long_only()` implementations +// behave and we intend to be compatible with those implementations. +static void test_long_only() { + struct optget_option long_opts[] = { + {"123", optget_no_arg, NULL, -7}, + {"all", optget_no_arg, NULL, 'a'}, + {NULL, optget_no_arg, NULL, 0}, + }; + char *const argv[] = {"cmd", "-vHUP", "-123", "-v", "-456", "-vall", "-x", "-al", + "-vx", "--123", "--all", "--a", "--", "-v", "arg1", NULL}; + int argc = argv_len(argv); + struct optget_test results[] = {{.rv = 'v', .ind = 1}, // -vHUP + {.rv = '?', .ind = 2}, // -vHUP + {.rv = -7, .ind = 3}, // -123 + {.rv = 'v', .ind = 4}, // -v + {.rv = '?', .ind = 5}, // -456 + {.rv = 'v', .ind = 5}, // -vall + {.rv = 'a', .ind = 6}, // -vall + {.rv = '?', .ind = 7}, // -x notice .opt isn't set to 'x' + {.rv = 'a', .ind = 8}, // -al + {.rv = 'v', .ind = 8}, // -vx + {.rv = '?', .ind = 9}, // -vx notice .opt isn't set to 'x' + {.rv = -7, .ind = 10}, // -123 + {.rv = 'a', .ind = 11}, // --all + {.rv = 'a', .ind = 12}, // --a + {.rv = -1, .ind = 13}}; // -- + + // We haven't requested recognition of numbers that look like short options as well as treating + // unrecognized bundles of short options as if they were long options. + test_optget_long_only(__LINE__, argc, argv, "v", long_opts, results); + + // We have requested recognition of numbers that look like short options as well as treating + // unrecognized bundles of short options as if they were long options. + // + // This should produce the same sequence of results as the "v" case because "long_only" takes + // precedence over short flags that otherwise look like numbers. In other words, you can't + // meaningfully use the two behaviors together to distinguish the two cases. + test_optget_long_only(__LINE__, argc, argv, "#v", long_opts, results); +} + +// Old GNU `getopt_long_only()` implementations, such as found in Fedora 28, have a bug which makes +// them unsuitable for verifying the correctness of our tests. +void check_for_getopt_long_only_bug() { + char *const argv[] = {"cmd", "-vHUP", "arg1", NULL}; + int argc = argv_len(argv); + struct option long_opts[] = { + {NULL, optget_no_arg, NULL, 0}, + }; + int rv; + + optind = opterr = 0; + rv = getopt_long_only(argc, argv, "+:v", long_opts, NULL); + if (rv != 'v' || optind != 1) goto broken; + rv = getopt_long_only(argc, argv, "+:v", long_opts, NULL); + if (rv != '?' || optind != 2) goto broken; + return; // looks okay to use + +broken: + tinfo("getopt_long_only() is broken -- skipping those tests"); + getopt_long_only_works = false; +} + +tmain() { + UNUSED(argc); + UNUSED(argv); + + check_for_getopt_long_only_bug(); + + // Test capabilities of `optget_long()` where we should be compatible with `getopt_long()`. + test_no_options(); + test_complex_options(); + + // Test capabilities of `optget_long()` not supported by `getopt_long()`. Note that some of the + // sub-tests will still verify compatibility with `getopt_long()`. + test_numeric_options(); + test_plus_options(); + test_long_only(); + + texit(0); +}