From b5435360d29f9202bd786b6059d5cc9e31e79992 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Thu, 13 Jul 2023 01:41:20 -0500 Subject: [PATCH] Add goto-level loop-contract annotation This commit allows us to parse and annotate loop contracts from JSON files to GOTO models. We first parse the JSON file to get the configuration. Function configurations consist of function id (regex) and a list of loop-contract configurations. Loop-contract configurations consist of: Loop number of the loop we are annotating to. Loop invariants as strings. (optional) Loop assigns as strings. (optional) Loop decreases as strings. (optional) A symbol map that map symbol names in the loop contracts strings to their name in the symbol table of the goto model. Loop contracts mangling consists of four steps. Construct a fake function with a fake loop that contains the loop contracts for parsing the contracts. Parse the fake function and extract the contracts as exprt. Substitute symbols in the extracted exprt with the symbols in the symbol table of the goto model according to the symbol map provided by the user. Typecheck all contract exprt. Annotate all contract exprt to the corresponding loop. --- doc/man/goto-instrument.1 | 3 + regression/contracts-dfcc/CMakeLists.txt | 4 +- .../test.json | 15 + .../test_contracts_file.desc | 14 + .../test.json | 18 + .../test_contracts_file.desc | 15 + .../contracts-dfcc/history-index/test.json | 13 + .../history-index/test_contracts_file.desc | 11 + .../loop_contracts_file_fail/main.c | 42 ++ .../test_matching_more_than_one_function.desc | 10 + .../test_matching_more_than_one_function.json | 13 + .../test_matching_no_function.desc | 10 + .../test_matching_no_function.json | 13 + .../loop_contracts_memcmp/main.c | 34 ++ .../loop_contracts_memcmp/test.desc | 13 + .../loop_contracts_memcmp/test.json | 19 + .../test_contracts_file.desc | 13 + .../test.json | 14 + .../test_contracts_file.desc | 17 + src/ansi-c/c_typecheck_base.h | 2 +- src/goto-instrument/Makefile | 1 + src/goto-instrument/contracts/contracts.h | 5 + .../contracts/contracts_wrangler.cpp | 380 ++++++++++++++++++ .../contracts/contracts_wrangler.h | 96 +++++ .../contracts/module_dependencies.txt | 1 + src/goto-instrument/contracts/utils.cpp | 41 ++ src/goto-instrument/contracts/utils.h | 10 + .../goto_instrument_parse_options.cpp | 8 + .../goto_instrument_parse_options.h | 2 + 29 files changed, 834 insertions(+), 3 deletions(-) create mode 100644 regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test.json create mode 100644 regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test_contracts_file.desc create mode 100644 regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test.json create mode 100644 regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test_contracts_file.desc create mode 100644 regression/contracts-dfcc/history-index/test.json create mode 100644 regression/contracts-dfcc/history-index/test_contracts_file.desc create mode 100644 regression/contracts-dfcc/loop_contracts_file_fail/main.c create mode 100644 regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.desc create mode 100644 regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.json create mode 100644 regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.desc create mode 100644 regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.json create mode 100644 regression/contracts-dfcc/loop_contracts_memcmp/main.c create mode 100644 regression/contracts-dfcc/loop_contracts_memcmp/test.desc create mode 100644 regression/contracts-dfcc/loop_contracts_memcmp/test.json create mode 100644 regression/contracts-dfcc/loop_contracts_memcmp/test_contracts_file.desc create mode 100644 regression/contracts-dfcc/variant_multi_instruction_loop_head/test.json create mode 100644 regression/contracts-dfcc/variant_multi_instruction_loop_head/test_contracts_file.desc create mode 100644 src/goto-instrument/contracts/contracts_wrangler.cpp create mode 100644 src/goto-instrument/contracts/contracts_wrangler.h diff --git a/doc/man/goto-instrument.1 b/doc/man/goto-instrument.1 index 98ea94c5349..6ecdb5efc37 100644 --- a/doc/man/goto-instrument.1 +++ b/doc/man/goto-instrument.1 @@ -1010,6 +1010,9 @@ check and use loop contracts when provided \fB\-loop\-contracts\-no\-unwind\fR do not unwind transformed loops .TP +\fB\-loop\-contracts\-file\fR \fIfile\fR +annotate loop contracts from the file to the goto program +.TP \fB\-\-replace\-call\-with\-contract\fR \fIfun\fR replace calls to \fIfun\fR with \fIfun\fR's contract .TP diff --git a/regression/contracts-dfcc/CMakeLists.txt b/regression/contracts-dfcc/CMakeLists.txt index b03adb34412..872fdc9f247 100644 --- a/regression/contracts-dfcc/CMakeLists.txt +++ b/regression/contracts-dfcc/CMakeLists.txt @@ -14,12 +14,12 @@ endif() add_test_pl_tests( - "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ ${is_windows} true" + "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ ${is_windows} true" ${gcc_only} ) add_test_pl_profile( "contracts-non-dfcc" - "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ ${is_windows} false" + "${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $ $ $ ${is_windows} false" ${gcc_only} "-C;-X;dfcc-only;-s;non-dfcc" "CORE" ) diff --git a/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test.json b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test.json new file mode 100644 index 00000000000..24ddbdb183a --- /dev/null +++ b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test.json @@ -0,0 +1,15 @@ +{ + "functions": [ + { + "foo": [ + { + "loop_id": "0", + "assigns": "i", + "invariants": "0 <= i && i <= 10", + "decreases": "i", + "symbol_map": "i,foo::1::1::i" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test_contracts_file.desc b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test_contracts_file.desc new file mode 100644 index 00000000000..08178b64213 --- /dev/null +++ b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_fail/test_contracts_file.desc @@ -0,0 +1,14 @@ +CORE +main.c +--loop-contracts-file test.json --dfcc main --apply-loop-contracts +^\[foo.assigns.\d+\] .* Check that nondet_var is assignable: FAILURE$ +^\[foo.assigns.\d+\] .* Check that __VERIFIER_var is assignable: FAILURE$ +^\[foo.assigns.\d+\] .* Check that __CPROVER_var is assignable: FAILURE$ +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- +-- +This test checks that program variables with special name prefixes +__CPROVER_, __VERIFIER, or nondet do not escape assigns clause checking. +Using loop contracts from the contracts file. diff --git a/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test.json b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test.json new file mode 100644 index 00000000000..572287066b8 --- /dev/null +++ b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test.json @@ -0,0 +1,18 @@ +{ + "functions": [ + { + "foo": [ + { + "loop_id": "0", + "assigns": "i,nondet_var, __VERIFIER_var, __CPROVER_var", + "invariants": "0 <= i && i <= 10", + "decreases": "i", + "symbol_map": "i,foo::1::1::i; + nondet_var,foo::1::nondet_var; + __VERIFIER_var, foo::1::__VERIFIER_var; + __CPROVER_var,foo::1::__CPROVER_var" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test_contracts_file.desc b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test_contracts_file.desc new file mode 100644 index 00000000000..4e33cec0137 --- /dev/null +++ b/regression/contracts-dfcc/dont_skip_cprover_prefixed_vars_pass/test_contracts_file.desc @@ -0,0 +1,15 @@ +CORE +main.c +--loop-contracts-file test.json --dfcc main --apply-loop-contracts +^\[foo.assigns.\d+\] .* Check that nondet_var is assignable: SUCCESS$ +^\[foo.assigns.\d+\] .* Check that __VERIFIER_var is assignable: SUCCESS$ +^\[foo.assigns.\d+\] .* Check that __CPROVER_var is assignable: SUCCESS$ +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test checks that when program variables names have special prefixes +__CPROVER_, __VERIFIER, or nondet, adding them to the write set makes them +assignable. +Using loop contracts from the contracts file. diff --git a/regression/contracts-dfcc/history-index/test.json b/regression/contracts-dfcc/history-index/test.json new file mode 100644 index 00000000000..534c1b99c57 --- /dev/null +++ b/regression/contracts-dfcc/history-index/test.json @@ -0,0 +1,13 @@ +{ + "functions": [ + { + "main": [ + { + "loop_id": "0", + "invariants": "x[0] == __CPROVER_loop_entry(x[0])", + "symbol_map": "x,main::1::x" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/history-index/test_contracts_file.desc b/regression/contracts-dfcc/history-index/test_contracts_file.desc new file mode 100644 index 00000000000..7d257c86e11 --- /dev/null +++ b/regression/contracts-dfcc/history-index/test_contracts_file.desc @@ -0,0 +1,11 @@ +CORE dfcc-only +main.c +--loop-contracts-file test.json --dfcc main --apply-loop-contracts +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^Tracking history of index expressions is not supported yet\. +-- +This test checks that `ID_index` expressions are allowed within history variables. +Using loop contracts from the contracts file. diff --git a/regression/contracts-dfcc/loop_contracts_file_fail/main.c b/regression/contracts-dfcc/loop_contracts_file_fail/main.c new file mode 100644 index 00000000000..c9727414143 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_file_fail/main.c @@ -0,0 +1,42 @@ +#include +#include + +#define SIZE 8 + +struct blob +{ + char *data; +}; + +void main() +{ + foo(); +} + +void foo() +{ + struct blob *b = malloc(sizeof(struct blob)); + b->data = malloc(SIZE); + + b->data[5] = 0; + for(unsigned i = 0; i < SIZE; i++) + // clang-format off + // clang-format on + { + b->data[i] = 1; + } +} + +void foo1() +{ + struct blob *b = malloc(sizeof(struct blob)); + b->data = malloc(SIZE); + + b->data[5] = 0; + for(unsigned i = 0; i < SIZE; i++) + // clang-format off + // clang-format on + { + b->data[i] = 1; + } +} diff --git a/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.desc b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.desc new file mode 100644 index 00000000000..24b07616575 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.desc @@ -0,0 +1,10 @@ +CORE +main.c +--loop-contracts-file test_matching_more_than_one_function.json --dfcc main --apply-loop-contracts +^function regex .* matches more than one function +^EXIT=6$ +^SIGNAL=0$ +-- +-- +This test checks that loop contracts wrangler correctly error out +when there is more than one matched function. diff --git a/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.json b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.json new file mode 100644 index 00000000000..102bf5ab0c2 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_more_than_one_function.json @@ -0,0 +1,13 @@ +{ + "functions": [ + { + "foo.*": [ + { + "loop_id": "0", + "invariants": "i <= 8", + "symbol_map": "i, foo::1::1::i" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.desc b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.desc new file mode 100644 index 00000000000..ae1a8d2757a --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.desc @@ -0,0 +1,10 @@ +CORE +main.c +--loop-contracts-file test_matching_no_function.json --dfcc main --apply-loop-contracts +^function .* matches no function +^EXIT=6$ +^SIGNAL=0$ +-- +-- +This test checks that loop contracts wrangler correctly error out +when there is no matched function. diff --git a/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.json b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.json new file mode 100644 index 00000000000..2ed2b0d4e77 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_file_fail/test_matching_no_function.json @@ -0,0 +1,13 @@ +{ + "functions": [ + { + "bar": [ + { + "loop_id": "0", + "invariants": "i <= 8", + "symbol_map": "i, foo::1::1::i" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/loop_contracts_memcmp/main.c b/regression/contracts-dfcc/loop_contracts_memcmp/main.c new file mode 100644 index 00000000000..572a134157e --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_memcmp/main.c @@ -0,0 +1,34 @@ +inline int memcmp(const char *s1, const char *s2, unsigned n) +{ + int res = 0; + const unsigned char *sc1 = s1, *sc2 = s2; + for(; n != 0; n--) + // clang-format off + __CPROVER_loop_invariant(n <= __CPROVER_loop_entry(n)) + __CPROVER_loop_invariant(__CPROVER_same_object(sc1, __CPROVER_loop_entry(sc1))) + __CPROVER_loop_invariant(__CPROVER_same_object(sc2, __CPROVER_loop_entry(sc2))) + __CPROVER_loop_invariant(sc1 <= s1 + __CPROVER_loop_entry(n)) + __CPROVER_loop_invariant(sc2 <= s2 + __CPROVER_loop_entry(n)) + __CPROVER_loop_invariant(res == 0) + __CPROVER_loop_invariant(sc1 -(const unsigned char*)s1 == sc2 -(const unsigned char*)s2 + && sc1 -(const unsigned char*)s1== __CPROVER_loop_entry(n) - n) + // clang-format on + { + res = (*sc1++) - (*sc2++); + long d1 = sc1 - (const unsigned char *)s1; + long d2 = sc2 - (const unsigned char *)s2; + if(res != 0) + return res; + } + return res; +} + +int main() +{ + unsigned SIZE; + __CPROVER_assume(1 < SIZE && SIZE < 65536); + unsigned char *a = malloc(SIZE); + unsigned char *b = malloc(SIZE); + memcpy(b, a, SIZE); + assert(memcmp(a, b, SIZE) == 0); +} diff --git a/regression/contracts-dfcc/loop_contracts_memcmp/test.desc b/regression/contracts-dfcc/loop_contracts_memcmp/test.desc new file mode 100644 index 00000000000..b1c89b4e607 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_memcmp/test.desc @@ -0,0 +1,13 @@ +CORE gcc-only +main.c +--dfcc main --apply-loop-contracts +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test checks that loop contracts work correctly on memcmp. +This test fails the CI job check-vs-2019-cmake-build-and-test with +the following error. +`Function '_errno' must exist in the model.` + diff --git a/regression/contracts-dfcc/loop_contracts_memcmp/test.json b/regression/contracts-dfcc/loop_contracts_memcmp/test.json new file mode 100644 index 00000000000..b528b11ae36 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_memcmp/test.json @@ -0,0 +1,19 @@ +{ + "functions": [ + { + "memcmp": [ + { + "loop_id": "0", + "invariants": "(n <= __CPROVER_loop_entry(n))&&(__CPROVER_same_object(sc1, __CPROVER_loop_entry(sc1)))&& + (__CPROVER_same_object(sc2, __CPROVER_loop_entry(sc2)))&& + (sc1 <= s1 + __CPROVER_loop_entry(n))&& + (sc2 <= s2 + __CPROVER_loop_entry(n))&& + (res == 0)&& + (sc1 -(const unsigned char*)s1 == sc2 -(const unsigned char*)s2 + && sc1 -(const unsigned char*)s1== __CPROVER_loop_entry(n) - n)", + "symbol_map": "n,memcmp::n; sc1,memcmp::1::sc1; sc2,memcmp::1::sc2; res,memcmp::1::res; s1, memcmp::s1; s2, memcmp::s2" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/loop_contracts_memcmp/test_contracts_file.desc b/regression/contracts-dfcc/loop_contracts_memcmp/test_contracts_file.desc new file mode 100644 index 00000000000..32a3dfe1555 --- /dev/null +++ b/regression/contracts-dfcc/loop_contracts_memcmp/test_contracts_file.desc @@ -0,0 +1,13 @@ +CORE gcc-only +main.c +--loop-contracts-file test.json --dfcc main --apply-loop-contracts +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +This test checks that loop contracts work correctly on memcmp. +Using loop contracts from the contracts file. +This test fails the CI job check-vs-2019-cmake-build-and-test with +the following error. +`Function '_errno' must exist in the model.` diff --git a/regression/contracts-dfcc/variant_multi_instruction_loop_head/test.json b/regression/contracts-dfcc/variant_multi_instruction_loop_head/test.json new file mode 100644 index 00000000000..48592a28464 --- /dev/null +++ b/regression/contracts-dfcc/variant_multi_instruction_loop_head/test.json @@ -0,0 +1,14 @@ +{ + "functions": [ + { + "main": [ + { + "loop_id": "0", + "invariants": "0 <= *y && *y <= 10", + "decreases": "10-x", + "symbol_map": "y,main::1::y;x,main::1::x" + } + ] + } + ] + } diff --git a/regression/contracts-dfcc/variant_multi_instruction_loop_head/test_contracts_file.desc b/regression/contracts-dfcc/variant_multi_instruction_loop_head/test_contracts_file.desc new file mode 100644 index 00000000000..f6968e4469b --- /dev/null +++ b/regression/contracts-dfcc/variant_multi_instruction_loop_head/test_contracts_file.desc @@ -0,0 +1,17 @@ +CORE dfcc-only +main.c +--loop-contracts-file test.json --dfcc main --apply-loop-contracts +^\[main.loop_assigns.\d+\] line 6 Check assigns clause inclusion for loop .*: SUCCESS$ +^\[main.loop_invariant_base.\d+\] line 6 Check invariant before entry for loop .*: SUCCESS$ +^\[main.loop_invariant_step.\d+\] line 6 Check invariant after step for loop .*: SUCCESS$ +^\[main.loop_step_unwinding.\d+\] line 6 Check step was unwound for loop .*: SUCCESS$ +^\[main.loop_decreases.\d+\] line 6 Check variant decreases after step for loop .*: SUCCESS$ +^\[main.assigns.\d+\] line \d+ Check that x is assignable: SUCCESS$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +-- +This test checks that decreases clause is properly initialized +when the loop head and loop invariant compilation emit several goto instructions. +Using loop contracts from the contracts file. diff --git a/src/ansi-c/c_typecheck_base.h b/src/ansi-c/c_typecheck_base.h index 1be65de97ed..5661c1b6d91 100644 --- a/src/ansi-c/c_typecheck_base.h +++ b/src/ansi-c/c_typecheck_base.h @@ -65,6 +65,7 @@ class c_typecheck_baset: virtual void typecheck()=0; virtual void typecheck_expr(exprt &expr); + virtual void typecheck_spec_assigns(exprt::operandst &targets); protected: symbol_table_baset &symbol_table; @@ -160,7 +161,6 @@ class c_typecheck_baset: /// is found in expr. virtual void check_was_freed(const exprt &expr, std::string &clause_type); - virtual void typecheck_spec_assigns(exprt::operandst &targets); virtual void typecheck_spec_frees(exprt::operandst &targets); virtual void typecheck_conditional_targets( exprt::operandst &targets, diff --git a/src/goto-instrument/Makefile b/src/goto-instrument/Makefile index 5e3c42074ed..300e8f74047 100644 --- a/src/goto-instrument/Makefile +++ b/src/goto-instrument/Makefile @@ -17,6 +17,7 @@ SRC = accelerate/accelerate.cpp \ branch.cpp \ call_sequences.cpp \ contracts/contracts.cpp \ + contracts/contracts_wrangler.cpp \ contracts/dynamic-frames/dfcc_utils.cpp \ contracts/dynamic-frames/dfcc_loop_nesting_graph.cpp \ contracts/dynamic-frames/dfcc_contract_mode.cpp \ diff --git a/src/goto-instrument/contracts/contracts.h b/src/goto-instrument/contracts/contracts.h index 3302dcf3074..ce0c061c6f5 100644 --- a/src/goto-instrument/contracts/contracts.h +++ b/src/goto-instrument/contracts/contracts.h @@ -40,6 +40,11 @@ Date: February 2016 " --loop-contracts-no-unwind\n" \ " do not unwind transformed loops\n" +#define FLAG_LOOP_CONTRACTS_FILE "loop-contracts-file" +#define HELP_LOOP_CONTRACTS_FILE \ + " --loop-contracts-file \n" \ + " parse and annotate loop contracts from files\n" + #define FLAG_REPLACE_CALL "replace-call-with-contract" #define HELP_REPLACE_CALL \ " --replace-call-with-contract [/contract]\n" \ diff --git a/src/goto-instrument/contracts/contracts_wrangler.cpp b/src/goto-instrument/contracts/contracts_wrangler.cpp new file mode 100644 index 00000000000..62c040006a9 --- /dev/null +++ b/src/goto-instrument/contracts/contracts_wrangler.cpp @@ -0,0 +1,380 @@ +/*******************************************************************\ + +Module: Parse and annotate contracts from configuration files + +Author: Qinheping Hu + +Date: June 2023 + +\*******************************************************************/ + +/// \file +/// Parse and annotate contracts + +#include "contracts_wrangler.h" + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +contracts_wranglert::contracts_wranglert( + goto_modelt &goto_model, + const std::string &file_name, + message_handlert &message_handler) + : ns(goto_model.symbol_table), + goto_model(goto_model), + symbol_table(goto_model.symbol_table), + goto_functions(goto_model.goto_functions), + message_handler(message_handler) +{ + jsont configuration; + + if(parse_json(file_name, message_handler, configuration)) + throw deserialization_exceptiont(file_name + " is not a valid JSON file"); + + configure_functions(configuration); + + // Mangle loop contracts function by function. + // We match function with a regex. There should one and only one function + // with name matched by the regex. + for(const auto &function_entry : this->functions) + { + bool function_found = false; + + for(const auto &function : goto_model.goto_functions.function_map) + { + // We find a matched function. + if(std::regex_match(function.first.c_str(), function_entry.first)) + { + if(function_found) + throw deserialization_exceptiont( + "function regex " + function_entry.second.regex_str + + " matches more than one function"); + + function_found = true; + + // Mangle all loop contracts in the function. + for(const auto &loop_entry : function_entry.second.loop_contracts) + { + mangle(loop_entry, function.first); + } + } + } + + if(!function_found) + throw deserialization_exceptiont( + "function regex " + function_entry.second.regex_str + + " matches no function"); + } +} + +void contracts_wranglert::add_builtin_pointer_function_symbol( + std::string function_name, + const std::size_t num_of_args) +{ + // Already exist. + if(symbol_table.has_symbol(CPROVER_PREFIX + function_name)) + return; + + code_typet::parameterst parameters; + for(unsigned i = 0; i < num_of_args; i++) + { + parameters.emplace_back(pointer_type(void_type())); + } + symbolt fun_symbol( + CPROVER_PREFIX + function_name, + code_typet(parameters, empty_typet()), + ID_C); + symbol_table.add(fun_symbol); +} + +void contracts_wranglert::mangle( + const loop_contracts_clauset &loop_contracts, + const irep_idt &function_id) +{ + messaget log(message_handler); + // Loop contracts mangling consists of four steps. + // + // 1. Construct a fake function with a fake loop that contains the loop + // contracts for parsing the contracts. + // 2. Parse the fake function and extract the contracts as exprt. + // 3. Replace symbols in the extracted exprt with the symbols in the + // symbol table of the goto model according to the symbol map provided by + // the user. + // 4. Typecheck all contracts exprt. + // 5. Annotate all contracts exprt to the corresponding loop. + + // Construct a fake function to parse. + // void function_id() + // { + // while(1==1) + // __CPROVER_assigns(assigns_str) // optional + // __CPROVER_loop_invariant(invariants_str) + // __CPROVER_decreases(decreases_str) // optional + // } + std::ostringstream pr; + pr << "void _fn() { while(1==1)"; + if(!loop_contracts.assigns.empty()) + { + pr << CPROVER_PREFIX << "assigns(" << loop_contracts.assigns << ") "; + } + pr << CPROVER_PREFIX << "loop_invariant(" << loop_contracts.invariants + << ") "; + if(!loop_contracts.decreases.empty()) + { + pr << CPROVER_PREFIX << "decreases(" << loop_contracts.decreases << ") "; + } + pr << " {}}" << std::endl; + + log.debug() << "Constructing fake function:\n" << pr.str() << log.eom; + + // Parse the fake function. + std::istringstream is(pr.str()); + ansi_c_parser.clear(); + ansi_c_parser.set_line_no(0); + ansi_c_parser.set_file(""); + ansi_c_parser.in = &is; + ansi_c_scanner_init(); + ansi_c_parser.parse(); + + // Extract the invariants from prase_tree. + exprt &inv_expr(static_cast(ansi_c_parser.parse_tree.items.front() + .declarator() + .value() + .operands()[0] + .add(ID_C_spec_loop_invariant)) + .operands()[0]); + + log.debug() << "Extracted loop invariants: " << inv_expr.pretty() << "\n" + << log.eom; + + // Extract the assigns from parse_tree. + optionalt assigns_expr; + if(!loop_contracts.assigns.empty()) + { + assigns_expr = static_cast(ansi_c_parser.parse_tree.items.front() + .declarator() + .value() + .operands()[0] + .add(ID_C_spec_assigns)) + .operands()[0]; + } + + // Extract the decreases from parse_tree. + exprt::operandst decreases_exprs; + if(!loop_contracts.decreases.empty()) + { + for(auto op : static_cast(ansi_c_parser.parse_tree.items.front() + .declarator() + .value() + .operands()[0] + .add(ID_C_spec_decreases)) + .operands()) + { + decreases_exprs.emplace_back(op); + } + } + + // Replace symbols in the clauses with the symbols in the symbol table. + log.debug() << "Start to replace symbols" << log.eom; + loop_contracts.replace_symbol(inv_expr); + if(assigns_expr.has_value()) + { + loop_contracts.replace_symbol(assigns_expr.value()); + } + for(exprt &decrease_expr : decreases_exprs) + { + loop_contracts.replace_symbol(decrease_expr); + } + + // Add builtin functions that might be used in contracts to the symbol table. + add_builtin_pointer_function_symbol("loop_entry", 1); + add_builtin_pointer_function_symbol("same_object", 2); + add_builtin_pointer_function_symbol("OBJECT_SIZE", 1); + add_builtin_pointer_function_symbol("POINTER_OBJECT", 1); + add_builtin_pointer_function_symbol("POINTER_OFFSET", 1); + + // Typecheck clauses. + ansi_c_typecheckt ansi_c_typecheck( + ansi_c_parser.parse_tree, + symbol_table, + symbol_table.lookup_ref(function_id).module.c_str(), + log.get_message_handler()); + + // Typecheck and annotate invariants. + { + log.debug() << "Start to typecheck invariants." << log.eom; + ansi_c_typecheck.typecheck_expr(inv_expr); + if(has_subexpr(inv_expr, ID_old)) + throw invalid_input_exceptiont(CPROVER_PREFIX + "old is not allowed in loop invariants."); + + invariant_mapt inv_map; + inv_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] = + and_exprt(simplify_expr(inv_expr, ns), true_exprt()); + annotate_invariants(inv_map, goto_model); + } + + // Typecheck and annotate assigns. + log.debug() << "Start to typecheck assigns." << log.eom; + if(assigns_expr.has_value()) + { + ansi_c_typecheck.typecheck_spec_assigns(assigns_expr.value().operands()); + + invariant_mapt assigns_map; + assigns_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] = + assigns_expr.value(); + annotate_assigns(assigns_map, goto_model); + } + + // Typecheck an annotate decreases. + log.debug() << "Start to typecheck decreases." << log.eom; + if(!decreases_exprs.empty()) + { + std::map> decreases_map; + decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] = + std::vector(); + for(exprt &decrease_expr : decreases_exprs) + { + ansi_c_typecheck.typecheck_expr(decrease_expr); + decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] + .emplace_back(decrease_expr); + } + annotate_decreases(decreases_map, goto_model); + } + + log.debug() << "Mangling finished." << log.eom; + // Remove added symbol. + symbol_table.remove(CPROVER_PREFIX "loop_entry"); + symbol_table.remove(CPROVER_PREFIX "same_object"); + symbol_table.remove(CPROVER_PREFIX "OBJECT_SIZE"); + symbol_table.remove(CPROVER_PREFIX "POINTER_OBJECT"); + symbol_table.remove(CPROVER_PREFIX "POINTER_OFFSET"); +} + +void contracts_wranglert::configure_functions(const jsont &config) +{ + auto functions = config["functions"]; + + if(functions.is_null()) + return; + + if(!functions.is_array()) + throw deserialization_exceptiont("functions entry must be sequence"); + + for(const auto &function : to_json_array(functions)) + { + if(!function.is_object()) + throw deserialization_exceptiont("function entry must be object"); + + for(const auto &function_entry : to_json_object(function)) + { + const auto function_name = function_entry.first; + const auto &items = function_entry.second; + + if(!items.is_array()) + throw deserialization_exceptiont("loops entry must be sequence"); + + this->functions.emplace_back(function_name, functiont{}); + functiont &function_config = this->functions.back().second; + function_config.regex_str = function_name; + + for(const auto &function_item : to_json_array(items)) + { + if(!function_item.is_object()) + throw deserialization_exceptiont("loop entry must be object"); + + std::string loop_id = ""; + std::string invariants_str = ""; + std::string assigns_str = ""; + std::string decreases_str = ""; + unchecked_replace_symbolt replace_symbol; + + for(const auto &loop_item : to_json_object(function_item)) + { + if(!loop_item.second.is_string()) + throw deserialization_exceptiont("loop item must be string"); + + if(loop_item.first == "loop_id") + { + loop_id = loop_item.second.value; + continue; + } + + if(loop_item.first == "assigns") + { + assigns_str = loop_item.second.value; + continue; + } + + if(loop_item.first == "decreases") + { + decreases_str = loop_item.second.value; + continue; + } + + if(loop_item.first == "invariants") + { + invariants_str = loop_item.second.value; + continue; + } + + if(loop_item.first == "symbol_map") + { + std::string symbol_map_str = loop_item.second.value; + + // Remove all space in symbol_map_str + symbol_map_str.erase( + std::remove_if( + symbol_map_str.begin(), symbol_map_str.end(), isspace), + symbol_map_str.end()); + + for(const auto &symbol_map_entry : + split_string(symbol_map_str, ';')) + { + const auto symbol_map_pair = split_string(symbol_map_entry, ','); + const auto &symbol = symbol_table.lookup(symbol_map_pair.back()); + if(symbol == nullptr) + throw deserialization_exceptiont( + "symbol with id \"" + symbol_map_pair.front() + + "\" does not exist in the symbol table"); + // Create a dummy symbol. The type will be discarded when inserted + // into replace_symbol. + const symbol_exprt old_expr( + symbol_map_pair.front(), bool_typet()); + replace_symbol.insert(old_expr, symbol->symbol_expr()); + } + + continue; + } + + throw deserialization_exceptiont("unexpected loop item"); + } + + if(loop_id.empty()) + { + throw deserialization_exceptiont( + "loop entry must have one identifier"); + } + + if(invariants_str.empty()) + { + throw deserialization_exceptiont( + "loop entry must have one invariant string"); + } + + function_config.loop_contracts.emplace_back( + loop_id, invariants_str, assigns_str, decreases_str, replace_symbol); + } + } + } +} diff --git a/src/goto-instrument/contracts/contracts_wrangler.h b/src/goto-instrument/contracts/contracts_wrangler.h new file mode 100644 index 00000000000..819754ea2e3 --- /dev/null +++ b/src/goto-instrument/contracts/contracts_wrangler.h @@ -0,0 +1,96 @@ +/*******************************************************************\ + +Module: Parse and annotate contracts from configuration files + +Author: Qinheping Hu + +Date: June 2023 + +\*******************************************************************/ + +/// \file +/// Parse and annotate contracts + +#ifndef CPROVER_GOTO_INSTRUMENT_CONTRACTS_CONTRACTS_WRANGLER_H +#define CPROVER_GOTO_INSTRUMENT_CONTRACTS_CONTRACTS_WRANGLER_H + +#include +#include +#include +#include + +#include +#include + +#include + +#include + +struct loop_contracts_clauset +{ + std::string identifier; + std::string invariants; + std::string assigns; + std::string decreases; + unchecked_replace_symbolt replace_symbol; + + loop_contracts_clauset( + std::string _identifier, + std::string _invariants_str, + std::string _assigns_str, + std::string _decreases_str, + unchecked_replace_symbolt _replace_symbol) + : identifier(std::move(_identifier)), + invariants(std::move(_invariants_str)), + assigns(_assigns_str), + decreases(_decreases_str), + replace_symbol(_replace_symbol) + { + } +}; + +struct functiont +{ + std::vector loop_contracts; + std::string regex_str; +}; + +using functionst = std::list>; + +class contracts_wranglert +{ +public: + contracts_wranglert( + goto_modelt &goto_model, + const std::string &file_name, + message_handlert &message_handler); + + namespacet ns; + +protected: + goto_modelt &goto_model; + symbol_tablet &symbol_table; + goto_functionst &goto_functions; + + message_handlert &message_handler; + + functionst functions; + + void configure_functions(const jsont &); + + /// @brief Mangle `loop_contracts` in the function with `function_id` + /// @param loop_contracts The contracts mangled in the function. + /// @param function_id The function containing the loop we mangle to. + void mangle( + const loop_contracts_clauset &loop_contracts, + const irep_idt &function_id); + + /// @brief Add builtin function symbol with `function_name` into symbol table. + /// @param function_name Name of the function to add. + /// @param num_of_args Number of arguments of the added symbol. + void add_builtin_pointer_function_symbol( + std::string function_name, + const std::size_t num_of_args); +}; + +#endif // CPROVER_GOTO_INSTRUMENT_CONTRACTS_CONTRACTS_WRANGLER_H diff --git a/src/goto-instrument/contracts/module_dependencies.txt b/src/goto-instrument/contracts/module_dependencies.txt index a797c099df6..d97a048f0ba 100644 --- a/src/goto-instrument/contracts/module_dependencies.txt +++ b/src/goto-instrument/contracts/module_dependencies.txt @@ -3,6 +3,7 @@ ansi-c goto-instrument/contracts goto-instrument goto-programs +json langapi linking util diff --git a/src/goto-instrument/contracts/utils.cpp b/src/goto-instrument/contracts/utils.cpp index cd0899fd79b..70802b92548 100644 --- a/src/goto-instrument/contracts/utils.cpp +++ b/src/goto-instrument/contracts/utils.cpp @@ -738,3 +738,44 @@ void annotate_assigns( condition.add(ID_C_spec_assigns) = assigns; } } + +void annotate_assigns( + const std::map &assigns_map, + goto_modelt &goto_model) +{ + for(const auto &assigns_map_entry : assigns_map) + { + loop_idt loop_id = assigns_map_entry.first; + irep_idt function_id = loop_id.function_id; + unsigned int loop_number = loop_id.loop_number; + + // get the last instruction of the target loop + auto &function = goto_model.goto_functions.function_map[function_id]; + goto_programt::targett loop_end = get_loop_end(loop_number, function); + + exprt &condition = loop_end->condition_nonconst(); + condition.add(ID_C_spec_assigns) = assigns_map_entry.second; + } +} + +void annotate_decreases( + const std::map> &decreases_map, + goto_modelt &goto_model) +{ + for(const auto &decreases_map_entry : decreases_map) + { + loop_idt loop_id = decreases_map_entry.first; + irep_idt function_id = loop_id.function_id; + unsigned int loop_number = loop_id.loop_number; + + // get the last instruction of the target loop + auto &function = goto_model.goto_functions.function_map[function_id]; + goto_programt::targett loop_end = get_loop_end(loop_number, function); + + exprt &condition = loop_end->condition_nonconst(); + auto decreases = exprt(ID_target_list); + for(const auto &e : decreases_map_entry.second) + decreases.add_to_operands(e); + condition.add(ID_C_spec_decreases) = decreases; + } +} diff --git a/src/goto-instrument/contracts/utils.h b/src/goto-instrument/contracts/utils.h index 81173aed8fc..0d1546965ef 100644 --- a/src/goto-instrument/contracts/utils.h +++ b/src/goto-instrument/contracts/utils.h @@ -317,4 +317,14 @@ void annotate_assigns( const std::map> &assigns_map, goto_modelt &goto_model); +void annotate_assigns( + const std::map &assigns_map, + goto_modelt &goto_model); + +/// Annotate the decreases in `decreases_map` to their corresponding +/// loops. Corresponding loops are specified by keys of `decreases_map` +void annotate_decreases( + const std::map> &decreases_map, + goto_modelt &goto_model); + #endif // CPROVER_GOTO_INSTRUMENT_CONTRACTS_UTILS_H diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 619b178b4ba..fa33aa3cab6 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1158,6 +1158,13 @@ void goto_instrument_parse_optionst::instrument_goto_program() goto_model.goto_functions.update(); } + if(cmdline.isset("loop-contracts-file")) + { + const auto file_name = cmdline.get_value("loop-contracts-file"); + contracts_wranglert contracts_wrangler( + goto_model, file_name, ui_message_handler); + } + bool use_dfcc = cmdline.isset(FLAG_DFCC); if(use_dfcc) { @@ -2012,6 +2019,7 @@ void goto_instrument_parse_optionst::help() HELP_DFCC HELP_LOOP_CONTRACTS HELP_LOOP_CONTRACTS_NO_UNWIND + HELP_LOOP_CONTRACTS_FILE HELP_REPLACE_CALL HELP_ENFORCE_CONTRACT HELP_ENFORCE_CONTRACT_REC diff --git a/src/goto-instrument/goto_instrument_parse_options.h b/src/goto-instrument/goto_instrument_parse_options.h index 07c0dfd9e1c..5b64af490e5 100644 --- a/src/goto-instrument/goto_instrument_parse_options.h +++ b/src/goto-instrument/goto_instrument_parse_options.h @@ -42,6 +42,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "unwindset.h" #include "contracts/contracts.h" +#include "contracts/contracts_wrangler.h" #include "contracts/dynamic-frames/dfcc.h" #include "wmm/weak_memory.h" @@ -101,6 +102,7 @@ Author: Daniel Kroening, kroening@kroening.com OPT_DFCC \ "(" FLAG_LOOP_CONTRACTS ")" \ "(" FLAG_LOOP_CONTRACTS_NO_UNWIND ")" \ + "(" FLAG_LOOP_CONTRACTS_FILE "):" \ "(" FLAG_REPLACE_CALL "):" \ "(" FLAG_ENFORCE_CONTRACT "):" \ OPT_ENFORCE_CONTRACT_REC \