diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 9e4a6ca72c..d1df4133b1 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3263,6 +3263,39 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { return BR_DONE; } + // (str.in_re e (re.range lo hi)) where a bound is not a concrete character. + // By SMT-LIB semantics (re.range lo hi) is the set of single characters c + // with lo <= c <= hi when lo and hi are themselves single characters, and + // the empty language otherwise; so membership is equivalent to lo, hi and + // e all being single characters with lo <= e <= hi. The derivative engine + // only unfolds ranges whose bounds are concrete characters, so without this + // reduction a range with a symbolic bound is left unsolved (and mk_re_range + // deliberately keeps such a range symbolic rather than unsoundly collapsing + // it to re.empty). Ranges with two concrete single-character bounds keep + // their existing derivative-based handling. + { + expr* rlo = nullptr, *rhi = nullptr; + if (re().is_range(b, rlo, rhi)) { + auto concrete_char = [&](expr* e) { + zstring s; + expr* ch = nullptr; + unsigned uc = 0; + return (str().is_string(e, s) && s.length() == 1) || + (str().is_unit(e, ch) && m_util.is_const_char(ch, uc)); + }; + if (!concrete_char(rlo) || !concrete_char(rhi)) { + expr_ref_vector conj(m()); + conj.push_back(m().mk_eq(str().mk_length(rlo), one())); + conj.push_back(m().mk_eq(str().mk_length(rhi), one())); + conj.push_back(m().mk_eq(str().mk_length(a), one())); + conj.push_back(str().mk_lex_le(rlo, a)); + conj.push_back(str().mk_lex_le(a, rhi)); + result = m().mk_and(conj); + return BR_REWRITE_FULL; + } + } + } + zstring s; if (str().is_string(a, s) && re().is_ground(b)) { // Just check membership and replace by true/false @@ -4126,35 +4159,47 @@ br_status seq_rewriter::mk_re_range(expr* lo, expr* hi, expr_ref& result) { is_empty = true; if (max_length(hi) == std::make_pair(true, rational(0))) is_empty = true; - if (!is_empty) { - if (str().is_string(lo, slo) && slo.length() == 1) - clo = slo[0]; - else if (str().is_unit(lo, lo1) && m_util.is_const_char(lo1, clo)) - ; - else - is_empty = true; - } - if (!is_empty) { - if (str().is_string(hi, shi) && shi.length() == 1) - chi = shi[0]; - else if (str().is_unit(hi, hi1) && m_util.is_const_char(hi1, chi)) - ; - else - is_empty = true; - } - - // clo/chi are only meaningful once both bounds were extracted; an early - // is_empty (from the length checks) leaves them at their default 0, so the - // is_empty return must come before the singleton/ordering checks below. - if (!is_empty && clo > chi) - is_empty = true; + // A provable length constraint (a bound can never be a single character) + // is the only sound way to conclude emptiness for a possibly-symbolic + // bound, so decide emptiness here before attempting to read concrete + // characters. if (is_empty) { sort* srt = re().mk_re(lo->get_sort()); result = re().mk_empty(srt); return BR_DONE; } + // Try to read concrete single-character bounds. A bound that is not a + // syntactic single-character literal is *symbolic* (its value depends on + // the model), NOT empty: collapsing such a range to re.empty is unsound + // (e.g. (re.range x x) is {x} whenever x is a single character), so we + // leave the range unevaluated (BR_FAILED) and let the theory solver + // reason about it. + bool has_clo = false, has_chi = false; + if (str().is_string(lo, slo) && slo.length() == 1) { + clo = slo[0]; + has_clo = true; + } + else if (str().is_unit(lo, lo1) && m_util.is_const_char(lo1, clo)) + has_clo = true; + if (str().is_string(hi, shi) && shi.length() == 1) { + chi = shi[0]; + has_chi = true; + } + else if (str().is_unit(hi, hi1) && m_util.is_const_char(hi1, chi)) + has_chi = true; + + if (!has_clo || !has_chi) + return BR_FAILED; + + // Both bounds are concrete characters: an inverted range is empty. + if (clo > chi) { + sort* srt = re().mk_re(lo->get_sort()); + result = re().mk_empty(srt); + return BR_DONE; + } + // Singleton: re.range "a" "a" → str.to_re "a" if (clo == chi) { result = re().mk_to_re(str().mk_string(zstring(clo))); diff --git a/src/test/seq_rewriter.cpp b/src/test/seq_rewriter.cpp index 658675fa72..920a4ae4ac 100644 --- a/src/test/seq_rewriter.cpp +++ b/src/test/seq_rewriter.cpp @@ -10,12 +10,19 @@ Tests: 4. Range ∪ Range → merged range for overlapping/adjacent 5. Complement of range → one or two ranges 6. Downstream operators absorb empty ranges correctly + 15. Symbolic-bound range membership rewrite (structural) + 16. Symbolic-bound range membership: concrete element, symbolic bounds (structural) + 17. Solver: (str.in_re x (re.range x x)) sat when len(x)=1 + 18. Solver: (str.in_re x (re.range x x)) unsat when len(x)=2 + 19. Solver: inverted symbolic bounds make membership unsatisfiable --*/ +#include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" #include "ast/reg_decl_plugins.h" #include "ast/rewriter/th_rewriter.h" #include "ast/seq_decl_plugin.h" +#include "smt/smt_context.h" #include // Build a single-char string literal expression. @@ -167,5 +174,86 @@ void tst_seq_rewriter() { ENSURE(su.re.is_empty(e)); } + // ----------------------------------------------------------------------- + // 15. Symbolic-bound range membership rewrite (structural). + // (str.in_re x (re.range x x)) with symbolic x should be unfolded + // by the rewriter into a conjunction of length and ordering + // constraints, not left stuck as an uninterpreted membership term. + // ----------------------------------------------------------------------- + { + app_ref x(m.mk_fresh_const("x", str_sort), m); + expr_ref rng(su.re.mk_range(x, x), m); + expr_ref e(su.re.mk_in_re(x, rng), m); + rw(e); + std::cout << "symbolic range (x in [x,x]): " << mk_pp(e, m) << "\n"; + ENSURE(m.is_and(e)); + } + + // ----------------------------------------------------------------------- + // 16. Symbolic-bound range membership: concrete element, symbolic bounds. + // (str.in_re "b" (re.range lo hi)) should also be unfolded to a + // conjunction when lo/hi are free variables. + // ----------------------------------------------------------------------- + { + app_ref lo(m.mk_fresh_const("lo", str_sort), m); + app_ref hi(m.mk_fresh_const("hi", str_sort), m); + expr_ref b_str(su.str.mk_string(zstring('b')), m); + expr_ref rng(su.re.mk_range(lo, hi), m); + expr_ref e(su.re.mk_in_re(b_str, rng), m); + rw(e); + std::cout << "symbolic range (\"b\" in [lo,hi]): " << mk_pp(e, m) << "\n"; + ENSURE(m.is_and(e)); + } + + // ----------------------------------------------------------------------- + // Solver-level tests: the unfolded conjunction must be decidable. + // ----------------------------------------------------------------------- + { + arith_util a_util(m); + + // 17. sat: (str.in_re x (re.range x x)) ∧ len(x)=1 + { + smt_params sp; + smt::context ctx(m, sp); + app_ref x(m.mk_fresh_const("x", str_sort), m); + ctx.assert_expr(su.re.mk_in_re(x, su.re.mk_range(x, x))); + ctx.assert_expr(m.mk_eq(su.str.mk_length(x), a_util.mk_int(1))); + lbool res = ctx.check(); + std::cout << "symbolic range solver sat (len=1): " << res << "\n"; + ENSURE(res == l_true); + } + + // 18. unsat: (str.in_re x (re.range x x)) ∧ len(x)=2 + // The unfolded membership requires len(x)=1, which contradicts len(x)=2. + { + smt_params sp; + smt::context ctx(m, sp); + app_ref x(m.mk_fresh_const("x", str_sort), m); + ctx.assert_expr(su.re.mk_in_re(x, su.re.mk_range(x, x))); + ctx.assert_expr(m.mk_eq(su.str.mk_length(x), a_util.mk_int(2))); + lbool res = ctx.check(); + std::cout << "symbolic range solver unsat (len=2): " << res << "\n"; + ENSURE(res == l_false); + } + + // 19. unsat: inverted symbolic bounds make membership false. + // (str.in_re "b" (re.range lo hi)) ∧ lo="z" ∧ hi="a" + // The unfolded conjunction requires lo <=_lex "b" <=_lex hi, but + // "z" > "b" > "a" so the ordering constraints are unsatisfiable. + { + smt_params sp; + smt::context ctx(m, sp); + app_ref lo(m.mk_fresh_const("lo", str_sort), m); + app_ref hi(m.mk_fresh_const("hi", str_sort), m); + expr_ref b_str(su.str.mk_string(zstring('b')), m); + ctx.assert_expr(su.re.mk_in_re(b_str, su.re.mk_range(lo, hi))); + ctx.assert_expr(m.mk_eq(lo, su.str.mk_string(zstring('z')))); + ctx.assert_expr(m.mk_eq(hi, su.str.mk_string(zstring('a')))); + lbool res = ctx.check(); + std::cout << "symbolic range solver inverted bounds unsat: " << res << "\n"; + ENSURE(res == l_false); + } + } + std::cout << "tst_seq_rewriter: all tests passed\n"; }