diff --git a/src/api/api_seq.cpp b/src/api/api_seq.cpp index b06a63507..b621dcebe 100644 --- a/src/api/api_seq.cpp +++ b/src/api/api_seq.cpp @@ -148,6 +148,7 @@ extern "C" { MK_BINARY(Z3_mk_seq_nth, mk_c(c)->get_seq_fid(), OP_SEQ_AT, SKIP); MK_UNARY(Z3_mk_seq_length, mk_c(c)->get_seq_fid(), OP_SEQ_LENGTH, SKIP); MK_TERNARY(Z3_mk_seq_index, mk_c(c)->get_seq_fid(), OP_SEQ_INDEX, SKIP); + MK_BINARY(Z3_mk_seq_last_index, mk_c(c)->get_seq_fid(), OP_SEQ_LAST_INDEX, SKIP); MK_UNARY(Z3_mk_seq_to_re, mk_c(c)->get_seq_fid(), OP_SEQ_TO_RE, SKIP); MK_BINARY(Z3_mk_seq_in_re, mk_c(c)->get_seq_fid(), OP_SEQ_IN_RE, SKIP); diff --git a/src/api/c++/z3++.h b/src/api/c++/z3++.h index c8aac77d3..4992015de 100644 --- a/src/api/c++/z3++.h +++ b/src/api/c++/z3++.h @@ -3276,6 +3276,12 @@ namespace z3 { s.check_error(); return expr(s.ctx(), r); } + inline expr last_indexof(expr const& s, expr const& substr) { + check_context(s, substr); + Z3_ast r = Z3_mk_seq_last_index(s.ctx(), s, substr); + s.check_error(); + return expr(s.ctx(), r); + } inline expr to_re(expr const& s) { MK_EXPR1(Z3_mk_seq_to_re, s); } diff --git a/src/api/python/z3/z3.py b/src/api/python/z3/z3.py index 880bf009f..69c90c3f6 100644 --- a/src/api/python/z3/z3.py +++ b/src/api/python/z3/z3.py @@ -10154,6 +10154,15 @@ def IndexOf(s, substr, offset): offset = IntVal(offset, ctx) return ArithRef(Z3_mk_seq_index(s.ctx_ref(), s.as_ast(), substr.as_ast(), offset.as_ast()), s.ctx) +def LastIndexOf(s, substr): + """Retrieve the last index of substring within a string""" + ctx = None + ctx = _get_ctx2(s, substr, ctx) + s = _coerce_seq(s, ctx) + substr = _coerce_seq(substr, ctx) + return ArithRef(Z3_mk_seq_last_index(s.ctx_ref(), s.as_ast(), substr.as_ast()), s.ctx) + + def Length(s): """Obtain the length of a sequence 's' >>> l = Length(StringVal("abc")) diff --git a/src/api/z3_api.h b/src/api/z3_api.h index 2ca2c14d9..7f1a1c70c 100644 --- a/src/api/z3_api.h +++ b/src/api/z3_api.h @@ -1171,6 +1171,7 @@ typedef enum { Z3_OP_SEQ_NTH, Z3_OP_SEQ_LENGTH, Z3_OP_SEQ_INDEX, + Z3_OP_SEQ_LAST_INDEX, Z3_OP_SEQ_TO_RE, Z3_OP_SEQ_IN_RE, @@ -3446,12 +3447,19 @@ extern "C" { /** \brief Return index of first occurrence of \c substr in \c s starting from offset \c offset. If \c s does not contain \c substr, then the value is -1, if \c offset is the length of \c s, then the value is -1 as well. - The function is under-specified if \c offset is negative or larger than the length of \c s. + The value is -1 if \c offset is negative or larger than the length of \c s. def_API('Z3_mk_seq_index' ,AST ,(_in(CONTEXT), _in(AST), _in(AST), _in(AST))) */ Z3_ast Z3_API Z3_mk_seq_index(Z3_context c, Z3_ast s, Z3_ast substr, Z3_ast offset); + /** + \brief Return the last occurrence of \c substr in \c s. + If \c s does not contain \c substr, then the value is -1, + def_API('Z3_mk_seq_last_index', AST, (_in(CONTEXT), _in(AST), _in(AST))) + */ + Z3_ast Z3_API Z3_mk_seq_last_index(Z3_context c, Z3_ast, Z3_ast substr); + /** \brief Convert string to integer. diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index cb915d86b..5aef06531 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -391,44 +391,57 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_SEQ_UNIT: SASSERT(num_args == 1); - return mk_seq_unit(args[0], result); + st = mk_seq_unit(args[0], result); + break; case OP_SEQ_EMPTY: return BR_FAILED; case OP_RE_PLUS: SASSERT(num_args == 1); - return mk_re_plus(args[0], result); + st = mk_re_plus(args[0], result); + break; case OP_RE_STAR: SASSERT(num_args == 1); st = mk_re_star(args[0], result); break; case OP_RE_OPTION: SASSERT(num_args == 1); - return mk_re_opt(args[0], result); + st = mk_re_opt(args[0], result); + break; case OP_RE_CONCAT: if (num_args == 1) { result = args[0]; - return BR_DONE; + st = BR_DONE; + } + else { + SASSERT(num_args == 2); + st = mk_re_concat(args[0], args[1], result); } - SASSERT(num_args == 2); - st = mk_re_concat(args[0], args[1], result); break; case OP_RE_UNION: if (num_args == 1) { - result = args[0]; return BR_DONE; + result = args[0]; + st = BR_DONE; } - SASSERT(num_args == 2); - return mk_re_union(args[0], args[1], result); + else { + SASSERT(num_args == 2); + st = mk_re_union(args[0], args[1], result); + } + break; case OP_RE_RANGE: SASSERT(num_args == 2); - return mk_re_range(args[0], args[1], result); + st = mk_re_range(args[0], args[1], result); + break; case OP_RE_INTERSECT: SASSERT(num_args == 2); - return mk_re_inter(args[0], args[1], result); + st = mk_re_inter(args[0], args[1], result); + break; case OP_RE_COMPLEMENT: SASSERT(num_args == 1); - return mk_re_complement(args[0], result); + st = mk_re_complement(args[0], result); + break; case OP_RE_LOOP: - return mk_re_loop(num_args, args, result); + st = mk_re_loop(num_args, args, result); + break; case OP_RE_EMPTY_SET: return BR_FAILED; case OP_RE_FULL_SEQ_SET: @@ -442,23 +455,29 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_SEQ_CONCAT: if (num_args == 1) { result = args[0]; - return BR_DONE; + st = BR_DONE; } - SASSERT(num_args == 2); - return mk_seq_concat(args[0], args[1], result); + else { + SASSERT(num_args == 2); + st = mk_seq_concat(args[0], args[1], result); + } + break; case OP_SEQ_LENGTH: SASSERT(num_args == 1); - return mk_seq_length(args[0], result); + st = mk_seq_length(args[0], result); + break; case OP_SEQ_EXTRACT: SASSERT(num_args == 3); st = mk_seq_extract(args[0], args[1], args[2], result); break; case OP_SEQ_CONTAINS: SASSERT(num_args == 2); - return mk_seq_contains(args[0], args[1], result); + st = mk_seq_contains(args[0], args[1], result); + break; case OP_SEQ_AT: SASSERT(num_args == 2); - return mk_seq_at(args[0], args[1], result); + st = mk_seq_at(args[0], args[1], result); + break; #if 0 case OP_SEQ_NTH: SASSERT(num_args == 2); @@ -466,35 +485,49 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con #endif case OP_SEQ_PREFIX: SASSERT(num_args == 2); - return mk_seq_prefix(args[0], args[1], result); + st = mk_seq_prefix(args[0], args[1], result); + break; case OP_SEQ_SUFFIX: SASSERT(num_args == 2); - return mk_seq_suffix(args[0], args[1], result); + st = mk_seq_suffix(args[0], args[1], result); + break; case OP_SEQ_INDEX: if (num_args == 2) { expr_ref arg3(m_autil.mk_int(0), m()); result = m_util.str.mk_index(args[0], args[1], arg3); - return BR_REWRITE1; + st = BR_REWRITE1; } - SASSERT(num_args == 3); - return mk_seq_index(args[0], args[1], args[2], result); + else { + SASSERT(num_args == 3); + st = mk_seq_index(args[0], args[1], args[2], result); + } + break; + case OP_SEQ_LAST_INDEX: + SASSERT(num_args == 2); + st = mk_seq_last_index(args[0], args[1], result); + break; case OP_SEQ_REPLACE: SASSERT(num_args == 3); - return mk_seq_replace(args[0], args[1], args[2], result); + st = mk_seq_replace(args[0], args[1], args[2], result); + break; case OP_SEQ_TO_RE: SASSERT(num_args == 1); - return mk_str_to_regexp(args[0], result); + st = mk_str_to_regexp(args[0], result); + break; case OP_SEQ_IN_RE: SASSERT(num_args == 2); - return mk_str_in_regexp(args[0], args[1], result); + st = mk_str_in_regexp(args[0], args[1], result); + break; case OP_STRING_CONST: return BR_FAILED; case OP_STRING_ITOS: SASSERT(num_args == 1); - return mk_str_itos(args[0], result); + st = mk_str_itos(args[0], result); + break; case OP_STRING_STOI: SASSERT(num_args == 1); - return mk_str_stoi(args[0], result); + st = mk_str_stoi(args[0], result); + break; case _OP_STRING_CONCAT: case _OP_STRING_PREFIX: case _OP_STRING_SUFFIX: @@ -908,6 +941,18 @@ br_status seq_rewriter::mk_seq_nth(expr* a, expr* b, expr_ref& result) { return BR_FAILED; } +br_status seq_rewriter::mk_seq_last_index(expr* a, expr* b, expr_ref& result) { + zstring s1, s2; + bool isc1 = m_util.str.is_string(a, s1); + bool isc2 = m_util.str.is_string(b, s2); + if (isc1 && isc2) { + int idx = s1.last_indexof(s2); + result = m_autil.mk_numeral(rational(idx), true); + return BR_DONE; + } + return BR_FAILED; +} + br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result) { zstring s1, s2; rational r; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 25b8979fc..73ab26591 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -116,6 +116,7 @@ class seq_rewriter { br_status mk_seq_at(expr* a, expr* b, expr_ref& result); br_status mk_seq_nth(expr* a, expr* b, expr_ref& result); br_status mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result); + br_status mk_seq_last_index(expr* a, expr* b, expr_ref& result); br_status mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result); br_status mk_seq_prefix(expr* a, expr* b, expr_ref& result); br_status mk_seq_suffix(expr* a, expr* b, expr_ref& result); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 7576e43c0..0fd27302a 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -273,6 +273,21 @@ int zstring::indexof(zstring const& other, int offset) const { return -1; } +int zstring::last_indexof(zstring const& other) const { + if (other.length() == 0) return length(); + if (other.length() > length()) return -1; + for (unsigned last = length() - other.length(); last-- > 0; ) { + bool suffix = true; + for (unsigned j = 0; suffix && j < other.length(); ++j) { + suffix = m_buffer[last + j] == other[j]; + } + if (suffix) { + return static_cast(last); + } + } + return -1; +} + zstring zstring::extract(int offset, int len) const { zstring result(m_encoding); SASSERT(0 <= offset && 0 <= len); @@ -530,6 +545,7 @@ void seq_decl_plugin::init() { m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA); m_sigs[OP_SEQ_REPLACE] = alloc(psig, m, "seq.replace", 1, 3, seq3A, seqA); m_sigs[OP_SEQ_INDEX] = alloc(psig, m, "seq.indexof", 1, 3, seq2AintT, intT); + m_sigs[OP_SEQ_LAST_INDEX] = alloc(psig, m, "seq.last_indexof", 1, 2, seqAseqA, intT); m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA); m_sigs[OP_SEQ_NTH] = alloc(psig, m, "seq.nth", 1, 2, seqAintT, A); m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT); @@ -774,7 +790,13 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, OP_SEQ_INDEX)); } return mk_str_fun(k, arity, domain, range, OP_SEQ_INDEX); - + case OP_SEQ_LAST_INDEX: + if (arity != 2) { + m.raise_exception("two arguments expected tin last_indexof"); + } + else { + return mk_seq_fun(k, arity, domain, range, OP_SEQ_LAST_INDEX); + } case OP_SEQ_PREFIX: return mk_seq_fun(k, arity, domain, range, _OP_STRING_PREFIX); case _OP_STRING_PREFIX: diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 1148c8411..0185e473c 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -43,6 +43,7 @@ enum seq_op_kind { OP_SEQ_NTH, OP_SEQ_LENGTH, OP_SEQ_INDEX, + OP_SEQ_LAST_INDEX, OP_SEQ_TO_RE, OP_SEQ_IN_RE, @@ -113,6 +114,7 @@ public: bool prefixof(zstring const& other) const; bool contains(zstring const& other) const; int indexof(zstring const& other, int offset) const; + int last_indexof(zstring const& other) const; zstring extract(int lo, int hi) const; zstring operator+(zstring const& other) const; bool operator==(const zstring& other) const; @@ -260,6 +262,7 @@ public: app* mk_prefix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_PREFIX, 2, es); } app* mk_suffix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_SUFFIX, 2, es); } app* mk_index(expr* a, expr* b, expr* i) const { expr* es[3] = { a, b, i}; return m.mk_app(m_fid, OP_SEQ_INDEX, 3, es); } + app* mk_last_index(expr* a, expr* b) const { expr* es[2] = { a, b}; return m.mk_app(m_fid, OP_SEQ_LAST_INDEX, 2, es); } app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); } app* mk_char(zstring const& s, unsigned idx) const; app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); } @@ -285,6 +288,7 @@ public: bool is_nth(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_NTH); } bool is_nth(expr const* n, expr*& s, unsigned& idx) const; bool is_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_INDEX); } + bool is_last_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LAST_INDEX); } bool is_replace(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE); } bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); } bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); } @@ -311,6 +315,7 @@ public: MATCH_BINARY(is_nth); MATCH_BINARY(is_index); MATCH_TERNARY(is_index); + MATCH_BINARY(is_last_index); MATCH_TERNARY(is_replace); MATCH_BINARY(is_prefix); MATCH_BINARY(is_suffix); diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 9a3f47757..7a899d5ae 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -4150,6 +4150,12 @@ expr_ref theory_seq::expand1(expr* e0, dependency*& eqs) { if (!arg1 || !arg2) return result; result = m_util.str.mk_index(arg1, arg2, e3); } + else if (m_util.str.is_last_index(e, e1, e2)) { + arg1 = try_expand(e1, deps); + arg2 = try_expand(e2, deps); + if (!arg1 || !arg2) return result; + result = m_util.str.mk_last_index(arg1, arg2); + } else if (m.is_ite(e, e1, e2, e3)) { if (ctx.e_internalized(e) && ctx.e_internalized(e2) && ctx.get_enode(e)->get_root() == ctx.get_enode(e2)->get_root()) { result = try_expand(e2, deps); @@ -4288,6 +4294,9 @@ void theory_seq::deque_axiom(expr* n) { else if (m_util.str.is_index(n)) { add_indexof_axiom(n); } + else if (m_util.str.is_last_index(n)) { + add_last_indexof_axiom(n); + } else if (m_util.str.is_replace(n)) { add_replace_axiom(n); } @@ -4437,6 +4446,42 @@ void theory_seq::add_indexof_axiom(expr* i) { } } +/** + + !contains(t, s) => i = -1 + |t| = 0 => |s| = 0 or i = -1 + |t| = 0 & |s| = 0 => i = 0 + |t| != 0 & contains(t, s) => t = xsy & i = len(x) + |s| = 0 or s = s_head*s_tail + |s| = 0 or !contains(s_tail*y, s) + + */ +void theory_seq::add_last_indexof_axiom(expr* i) { + expr* s = nullptr, *t = nullptr; + VERIFY(m_util.str.is_last_index(i, t, s)); + expr_ref minus_one(m_autil.mk_int(-1), m); + expr_ref zero(m_autil.mk_int(0), m); + expr_ref s_head(m), s_tail(m); + expr_ref x = mk_skolem(symbol("seq.last_indexof_left"), t, s); + expr_ref y = mk_skolem(symbol("seq.last_indexof_right"), t, s); + mk_decompose(s, s_head, s_tail); + literal cnt = mk_literal(m_util.str.mk_contains(t, s)); + literal cnt2 = mk_literal(m_util.str.mk_contains(mk_concat(s_tail, y), s)); + literal i_eq_m1 = mk_eq(i, minus_one, false); + literal i_eq_0 = mk_eq(i, zero, false); + literal s_eq_empty = mk_eq_empty(s); + literal t_eq_empty = mk_eq_empty(t); + expr_ref xsy = mk_concat(x, s, y); + + add_axiom(cnt, i_eq_m1); + add_axiom(~t_eq_empty, s_eq_empty, i_eq_m1); + add_axiom(~t_eq_empty, ~s_eq_empty, i_eq_0); + add_axiom(t_eq_empty, ~cnt, mk_seq_eq(t, xsy)); + add_axiom(t_eq_empty, ~cnt, mk_eq(i, mk_len(x), false)); + add_axiom(s_eq_empty, mk_eq(s, mk_concat(s_head, s_tail), false)); + add_axiom(s_eq_empty, ~cnt2); +} + /* let r = replace(a, s, t) diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index 166c8774e..f0d7fa15b 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -550,6 +550,7 @@ namespace smt { void push_lit_as_expr(literal l, expr_ref_vector& buf); void add_axiom(literal l1, literal l2 = null_literal, literal l3 = null_literal, literal l4 = null_literal, literal l5 = null_literal); void add_indexof_axiom(expr* e); + void add_last_indexof_axiom(expr* e); void add_replace_axiom(expr* e); void add_extract_axiom(expr* e); void add_length_axiom(expr* n);