3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-06 17:44:08 +00:00

implementing last-index-of #2089

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2019-03-22 12:29:34 -07:00
parent 62ec02e50f
commit 3c8fd83c97
10 changed files with 174 additions and 31 deletions

View file

@ -148,6 +148,7 @@ extern "C" {
MK_BINARY(Z3_mk_seq_nth, mk_c(c)->get_seq_fid(), OP_SEQ_AT, SKIP);
MK_UNARY(Z3_mk_seq_length, mk_c(c)->get_seq_fid(), OP_SEQ_LENGTH, SKIP);
MK_TERNARY(Z3_mk_seq_index, mk_c(c)->get_seq_fid(), OP_SEQ_INDEX, SKIP);
MK_BINARY(Z3_mk_seq_last_index, mk_c(c)->get_seq_fid(), OP_SEQ_LAST_INDEX, SKIP);
MK_UNARY(Z3_mk_seq_to_re, mk_c(c)->get_seq_fid(), OP_SEQ_TO_RE, SKIP);
MK_BINARY(Z3_mk_seq_in_re, mk_c(c)->get_seq_fid(), OP_SEQ_IN_RE, SKIP);

View file

@ -3276,6 +3276,12 @@ namespace z3 {
s.check_error();
return expr(s.ctx(), r);
}
inline expr last_indexof(expr const& s, expr const& substr) {
check_context(s, substr);
Z3_ast r = Z3_mk_seq_last_index(s.ctx(), s, substr);
s.check_error();
return expr(s.ctx(), r);
}
inline expr to_re(expr const& s) {
MK_EXPR1(Z3_mk_seq_to_re, s);
}

View file

@ -10154,6 +10154,15 @@ def IndexOf(s, substr, offset):
offset = IntVal(offset, ctx)
return ArithRef(Z3_mk_seq_index(s.ctx_ref(), s.as_ast(), substr.as_ast(), offset.as_ast()), s.ctx)
def LastIndexOf(s, substr):
"""Retrieve the last index of substring within a string"""
ctx = None
ctx = _get_ctx2(s, substr, ctx)
s = _coerce_seq(s, ctx)
substr = _coerce_seq(substr, ctx)
return ArithRef(Z3_mk_seq_last_index(s.ctx_ref(), s.as_ast(), substr.as_ast()), s.ctx)
def Length(s):
"""Obtain the length of a sequence 's'
>>> l = Length(StringVal("abc"))

View file

@ -1171,6 +1171,7 @@ typedef enum {
Z3_OP_SEQ_NTH,
Z3_OP_SEQ_LENGTH,
Z3_OP_SEQ_INDEX,
Z3_OP_SEQ_LAST_INDEX,
Z3_OP_SEQ_TO_RE,
Z3_OP_SEQ_IN_RE,
@ -3446,12 +3447,19 @@ extern "C" {
/**
\brief Return index of first occurrence of \c substr in \c s starting from offset \c offset.
If \c s does not contain \c substr, then the value is -1, if \c offset is the length of \c s, then the value is -1 as well.
The function is under-specified if \c offset is negative or larger than the length of \c s.
The value is -1 if \c offset is negative or larger than the length of \c s.
def_API('Z3_mk_seq_index' ,AST ,(_in(CONTEXT), _in(AST), _in(AST), _in(AST)))
*/
Z3_ast Z3_API Z3_mk_seq_index(Z3_context c, Z3_ast s, Z3_ast substr, Z3_ast offset);
/**
\brief Return the last occurrence of \c substr in \c s.
If \c s does not contain \c substr, then the value is -1,
def_API('Z3_mk_seq_last_index', AST, (_in(CONTEXT), _in(AST), _in(AST)))
*/
Z3_ast Z3_API Z3_mk_seq_last_index(Z3_context c, Z3_ast, Z3_ast substr);
/**
\brief Convert string to integer.

View file

@ -391,44 +391,57 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
case OP_SEQ_UNIT:
SASSERT(num_args == 1);
return mk_seq_unit(args[0], result);
st = mk_seq_unit(args[0], result);
break;
case OP_SEQ_EMPTY:
return BR_FAILED;
case OP_RE_PLUS:
SASSERT(num_args == 1);
return mk_re_plus(args[0], result);
st = mk_re_plus(args[0], result);
break;
case OP_RE_STAR:
SASSERT(num_args == 1);
st = mk_re_star(args[0], result);
break;
case OP_RE_OPTION:
SASSERT(num_args == 1);
return mk_re_opt(args[0], result);
st = mk_re_opt(args[0], result);
break;
case OP_RE_CONCAT:
if (num_args == 1) {
result = args[0];
return BR_DONE;
st = BR_DONE;
}
else {
SASSERT(num_args == 2);
st = mk_re_concat(args[0], args[1], result);
}
SASSERT(num_args == 2);
st = mk_re_concat(args[0], args[1], result);
break;
case OP_RE_UNION:
if (num_args == 1) {
result = args[0]; return BR_DONE;
result = args[0];
st = BR_DONE;
}
SASSERT(num_args == 2);
return mk_re_union(args[0], args[1], result);
else {
SASSERT(num_args == 2);
st = mk_re_union(args[0], args[1], result);
}
break;
case OP_RE_RANGE:
SASSERT(num_args == 2);
return mk_re_range(args[0], args[1], result);
st = mk_re_range(args[0], args[1], result);
break;
case OP_RE_INTERSECT:
SASSERT(num_args == 2);
return mk_re_inter(args[0], args[1], result);
st = mk_re_inter(args[0], args[1], result);
break;
case OP_RE_COMPLEMENT:
SASSERT(num_args == 1);
return mk_re_complement(args[0], result);
st = mk_re_complement(args[0], result);
break;
case OP_RE_LOOP:
return mk_re_loop(num_args, args, result);
st = mk_re_loop(num_args, args, result);
break;
case OP_RE_EMPTY_SET:
return BR_FAILED;
case OP_RE_FULL_SEQ_SET:
@ -442,23 +455,29 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
case OP_SEQ_CONCAT:
if (num_args == 1) {
result = args[0];
return BR_DONE;
st = BR_DONE;
}
SASSERT(num_args == 2);
return mk_seq_concat(args[0], args[1], result);
else {
SASSERT(num_args == 2);
st = mk_seq_concat(args[0], args[1], result);
}
break;
case OP_SEQ_LENGTH:
SASSERT(num_args == 1);
return mk_seq_length(args[0], result);
st = mk_seq_length(args[0], result);
break;
case OP_SEQ_EXTRACT:
SASSERT(num_args == 3);
st = mk_seq_extract(args[0], args[1], args[2], result);
break;
case OP_SEQ_CONTAINS:
SASSERT(num_args == 2);
return mk_seq_contains(args[0], args[1], result);
st = mk_seq_contains(args[0], args[1], result);
break;
case OP_SEQ_AT:
SASSERT(num_args == 2);
return mk_seq_at(args[0], args[1], result);
st = mk_seq_at(args[0], args[1], result);
break;
#if 0
case OP_SEQ_NTH:
SASSERT(num_args == 2);
@ -466,35 +485,49 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
#endif
case OP_SEQ_PREFIX:
SASSERT(num_args == 2);
return mk_seq_prefix(args[0], args[1], result);
st = mk_seq_prefix(args[0], args[1], result);
break;
case OP_SEQ_SUFFIX:
SASSERT(num_args == 2);
return mk_seq_suffix(args[0], args[1], result);
st = mk_seq_suffix(args[0], args[1], result);
break;
case OP_SEQ_INDEX:
if (num_args == 2) {
expr_ref arg3(m_autil.mk_int(0), m());
result = m_util.str.mk_index(args[0], args[1], arg3);
return BR_REWRITE1;
st = BR_REWRITE1;
}
SASSERT(num_args == 3);
return mk_seq_index(args[0], args[1], args[2], result);
else {
SASSERT(num_args == 3);
st = mk_seq_index(args[0], args[1], args[2], result);
}
break;
case OP_SEQ_LAST_INDEX:
SASSERT(num_args == 2);
st = mk_seq_last_index(args[0], args[1], result);
break;
case OP_SEQ_REPLACE:
SASSERT(num_args == 3);
return mk_seq_replace(args[0], args[1], args[2], result);
st = mk_seq_replace(args[0], args[1], args[2], result);
break;
case OP_SEQ_TO_RE:
SASSERT(num_args == 1);
return mk_str_to_regexp(args[0], result);
st = mk_str_to_regexp(args[0], result);
break;
case OP_SEQ_IN_RE:
SASSERT(num_args == 2);
return mk_str_in_regexp(args[0], args[1], result);
st = mk_str_in_regexp(args[0], args[1], result);
break;
case OP_STRING_CONST:
return BR_FAILED;
case OP_STRING_ITOS:
SASSERT(num_args == 1);
return mk_str_itos(args[0], result);
st = mk_str_itos(args[0], result);
break;
case OP_STRING_STOI:
SASSERT(num_args == 1);
return mk_str_stoi(args[0], result);
st = mk_str_stoi(args[0], result);
break;
case _OP_STRING_CONCAT:
case _OP_STRING_PREFIX:
case _OP_STRING_SUFFIX:
@ -908,6 +941,18 @@ br_status seq_rewriter::mk_seq_nth(expr* a, expr* b, expr_ref& result) {
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_last_index(expr* a, expr* b, expr_ref& result) {
zstring s1, s2;
bool isc1 = m_util.str.is_string(a, s1);
bool isc2 = m_util.str.is_string(b, s2);
if (isc1 && isc2) {
int idx = s1.last_indexof(s2);
result = m_autil.mk_numeral(rational(idx), true);
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result) {
zstring s1, s2;
rational r;

View file

@ -116,6 +116,7 @@ class seq_rewriter {
br_status mk_seq_at(expr* a, expr* b, expr_ref& result);
br_status mk_seq_nth(expr* a, expr* b, expr_ref& result);
br_status mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result);
br_status mk_seq_last_index(expr* a, expr* b, expr_ref& result);
br_status mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result);
br_status mk_seq_prefix(expr* a, expr* b, expr_ref& result);
br_status mk_seq_suffix(expr* a, expr* b, expr_ref& result);

View file

@ -273,6 +273,21 @@ int zstring::indexof(zstring const& other, int offset) const {
return -1;
}
int zstring::last_indexof(zstring const& other) const {
if (other.length() == 0) return length();
if (other.length() > length()) return -1;
for (unsigned last = length() - other.length(); last-- > 0; ) {
bool suffix = true;
for (unsigned j = 0; suffix && j < other.length(); ++j) {
suffix = m_buffer[last + j] == other[j];
}
if (suffix) {
return static_cast<int>(last);
}
}
return -1;
}
zstring zstring::extract(int offset, int len) const {
zstring result(m_encoding);
SASSERT(0 <= offset && 0 <= len);
@ -530,6 +545,7 @@ void seq_decl_plugin::init() {
m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA);
m_sigs[OP_SEQ_REPLACE] = alloc(psig, m, "seq.replace", 1, 3, seq3A, seqA);
m_sigs[OP_SEQ_INDEX] = alloc(psig, m, "seq.indexof", 1, 3, seq2AintT, intT);
m_sigs[OP_SEQ_LAST_INDEX] = alloc(psig, m, "seq.last_indexof", 1, 2, seqAseqA, intT);
m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA);
m_sigs[OP_SEQ_NTH] = alloc(psig, m, "seq.nth", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT);
@ -774,7 +790,13 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, OP_SEQ_INDEX));
}
return mk_str_fun(k, arity, domain, range, OP_SEQ_INDEX);
case OP_SEQ_LAST_INDEX:
if (arity != 2) {
m.raise_exception("two arguments expected tin last_indexof");
}
else {
return mk_seq_fun(k, arity, domain, range, OP_SEQ_LAST_INDEX);
}
case OP_SEQ_PREFIX:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_PREFIX);
case _OP_STRING_PREFIX:

View file

@ -43,6 +43,7 @@ enum seq_op_kind {
OP_SEQ_NTH,
OP_SEQ_LENGTH,
OP_SEQ_INDEX,
OP_SEQ_LAST_INDEX,
OP_SEQ_TO_RE,
OP_SEQ_IN_RE,
@ -113,6 +114,7 @@ public:
bool prefixof(zstring const& other) const;
bool contains(zstring const& other) const;
int indexof(zstring const& other, int offset) const;
int last_indexof(zstring const& other) const;
zstring extract(int lo, int hi) const;
zstring operator+(zstring const& other) const;
bool operator==(const zstring& other) const;
@ -260,6 +262,7 @@ public:
app* mk_prefix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_PREFIX, 2, es); }
app* mk_suffix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_SUFFIX, 2, es); }
app* mk_index(expr* a, expr* b, expr* i) const { expr* es[3] = { a, b, i}; return m.mk_app(m_fid, OP_SEQ_INDEX, 3, es); }
app* mk_last_index(expr* a, expr* b) const { expr* es[2] = { a, b}; return m.mk_app(m_fid, OP_SEQ_LAST_INDEX, 2, es); }
app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); }
app* mk_char(zstring const& s, unsigned idx) const;
app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); }
@ -285,6 +288,7 @@ public:
bool is_nth(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_NTH); }
bool is_nth(expr const* n, expr*& s, unsigned& idx) const;
bool is_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_INDEX); }
bool is_last_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LAST_INDEX); }
bool is_replace(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE); }
bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); }
bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); }
@ -311,6 +315,7 @@ public:
MATCH_BINARY(is_nth);
MATCH_BINARY(is_index);
MATCH_TERNARY(is_index);
MATCH_BINARY(is_last_index);
MATCH_TERNARY(is_replace);
MATCH_BINARY(is_prefix);
MATCH_BINARY(is_suffix);

View file

@ -4150,6 +4150,12 @@ expr_ref theory_seq::expand1(expr* e0, dependency*& eqs) {
if (!arg1 || !arg2) return result;
result = m_util.str.mk_index(arg1, arg2, e3);
}
else if (m_util.str.is_last_index(e, e1, e2)) {
arg1 = try_expand(e1, deps);
arg2 = try_expand(e2, deps);
if (!arg1 || !arg2) return result;
result = m_util.str.mk_last_index(arg1, arg2);
}
else if (m.is_ite(e, e1, e2, e3)) {
if (ctx.e_internalized(e) && ctx.e_internalized(e2) && ctx.get_enode(e)->get_root() == ctx.get_enode(e2)->get_root()) {
result = try_expand(e2, deps);
@ -4288,6 +4294,9 @@ void theory_seq::deque_axiom(expr* n) {
else if (m_util.str.is_index(n)) {
add_indexof_axiom(n);
}
else if (m_util.str.is_last_index(n)) {
add_last_indexof_axiom(n);
}
else if (m_util.str.is_replace(n)) {
add_replace_axiom(n);
}
@ -4437,6 +4446,42 @@ void theory_seq::add_indexof_axiom(expr* i) {
}
}
/**
!contains(t, s) => i = -1
|t| = 0 => |s| = 0 or i = -1
|t| = 0 & |s| = 0 => i = 0
|t| != 0 & contains(t, s) => t = xsy & i = len(x)
|s| = 0 or s = s_head*s_tail
|s| = 0 or !contains(s_tail*y, s)
*/
void theory_seq::add_last_indexof_axiom(expr* i) {
expr* s = nullptr, *t = nullptr;
VERIFY(m_util.str.is_last_index(i, t, s));
expr_ref minus_one(m_autil.mk_int(-1), m);
expr_ref zero(m_autil.mk_int(0), m);
expr_ref s_head(m), s_tail(m);
expr_ref x = mk_skolem(symbol("seq.last_indexof_left"), t, s);
expr_ref y = mk_skolem(symbol("seq.last_indexof_right"), t, s);
mk_decompose(s, s_head, s_tail);
literal cnt = mk_literal(m_util.str.mk_contains(t, s));
literal cnt2 = mk_literal(m_util.str.mk_contains(mk_concat(s_tail, y), s));
literal i_eq_m1 = mk_eq(i, minus_one, false);
literal i_eq_0 = mk_eq(i, zero, false);
literal s_eq_empty = mk_eq_empty(s);
literal t_eq_empty = mk_eq_empty(t);
expr_ref xsy = mk_concat(x, s, y);
add_axiom(cnt, i_eq_m1);
add_axiom(~t_eq_empty, s_eq_empty, i_eq_m1);
add_axiom(~t_eq_empty, ~s_eq_empty, i_eq_0);
add_axiom(t_eq_empty, ~cnt, mk_seq_eq(t, xsy));
add_axiom(t_eq_empty, ~cnt, mk_eq(i, mk_len(x), false));
add_axiom(s_eq_empty, mk_eq(s, mk_concat(s_head, s_tail), false));
add_axiom(s_eq_empty, ~cnt2);
}
/*
let r = replace(a, s, t)

View file

@ -550,6 +550,7 @@ namespace smt {
void push_lit_as_expr(literal l, expr_ref_vector& buf);
void add_axiom(literal l1, literal l2 = null_literal, literal l3 = null_literal, literal l4 = null_literal, literal l5 = null_literal);
void add_indexof_axiom(expr* e);
void add_last_indexof_axiom(expr* e);
void add_replace_axiom(expr* e);
void add_extract_axiom(expr* e);
void add_length_axiom(expr* n);