mirror of
https://github.com/Z3Prover/z3
synced 2026-02-01 06:47:56 +00:00
initial pass at using derivatives in regex unfolding
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
b7b8ed23fb
commit
eb3f20832e
11 changed files with 694 additions and 228 deletions
|
|
@ -13,10 +13,11 @@ z3_add_component(smt
|
|||
old_interval.cpp
|
||||
qi_queue.cpp
|
||||
seq_axioms.cpp
|
||||
seq_skolem.cpp
|
||||
seq_eq_solver.cpp
|
||||
seq_ne_solver.cpp
|
||||
seq_offset_eq.cpp
|
||||
seq_regex.cpp
|
||||
seq_skolem.cpp
|
||||
seq_unicode.cpp
|
||||
smt_almost_cg_table.cpp
|
||||
smt_arith_value.cpp
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ def_module_params(module_name='smt',
|
|||
('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
|
||||
('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
|
||||
('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),
|
||||
('seq.use_derivatives', BOOL, False, 'dev flag (not for users) enable derivative based unfolding of regex'),
|
||||
('str.strong_arrangements', BOOL, True, 'assert equivalences instead of implications when generating string arrangement axioms'),
|
||||
('str.aggressive_length_testing', BOOL, False, 'prioritize testing concrete length values over generating more options'),
|
||||
('str.aggressive_value_testing', BOOL, False, 'prioritize testing concrete string constant values over generating more options'),
|
||||
|
|
|
|||
|
|
@ -21,4 +21,5 @@ void theory_seq_params::updt_params(params_ref const & _p) {
|
|||
smt_params_helper p(_p);
|
||||
m_split_w_len = p.seq_split_w_len();
|
||||
m_seq_validate = p.seq_validate();
|
||||
m_seq_use_derivatives = p.seq_use_derivatives();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,13 @@ struct theory_seq_params {
|
|||
*/
|
||||
bool m_split_w_len;
|
||||
bool m_seq_validate;
|
||||
bool m_seq_use_derivatives;
|
||||
|
||||
|
||||
theory_seq_params(params_ref const & p = params_ref()):
|
||||
m_split_w_len(true),
|
||||
m_seq_validate(false)
|
||||
m_seq_validate(false),
|
||||
m_seq_use_derivatives(false)
|
||||
{
|
||||
updt_params(p);
|
||||
}
|
||||
|
|
|
|||
266
src/smt/seq_regex.cpp
Normal file
266
src/smt/seq_regex.cpp
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
/*++
|
||||
Copyright (c) 2011 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_regex.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Solver for regexes
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2020-5-22
|
||||
|
||||
--*/
|
||||
|
||||
#include "smt/seq_regex.h"
|
||||
#include "smt/theory_seq.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
seq_regex::seq_regex(theory_seq& th):
|
||||
th(th),
|
||||
ctx(th.get_context()),
|
||||
m(th.get_manager())
|
||||
{}
|
||||
|
||||
seq_util& seq_regex::u() { return th.m_util; }
|
||||
class seq_util::re& seq_regex::re() { return th.m_util.re; }
|
||||
class seq_util::str& seq_regex::str() { return th.m_util.str; }
|
||||
seq_rewriter& seq_regex::seq_rw() { return th.m_seq_rewrite; }
|
||||
seq_skolem& seq_regex::sk() { return th.m_sk; }
|
||||
void seq_regex::rewrite(expr_ref& e) { th.m_rewrite(e); }
|
||||
|
||||
void seq_regex::propagate_in_re(literal lit) {
|
||||
if (!propagate(lit))
|
||||
m_to_propagate.push_back(lit);
|
||||
}
|
||||
|
||||
bool seq_regex::propagate() {
|
||||
bool change = false;
|
||||
for (unsigned i = 0; !ctx.inconsistent() && i < m_to_propagate.size(); ++i) {
|
||||
if (propagate(m_to_propagate[i])) {
|
||||
m_to_propagate.erase_and_swap(i--);
|
||||
change = true;
|
||||
}
|
||||
}
|
||||
return change;
|
||||
}
|
||||
|
||||
bool seq_regex::propagate(literal lit) {
|
||||
expr* s = nullptr, *r = nullptr;
|
||||
expr* e = ctx.bool_var2expr(lit.var());
|
||||
VERIFY(str().is_in_re(e, s, r));
|
||||
|
||||
// only positive assignments of membership propagation are relevant.
|
||||
if (lit.sign() && sk().is_tail(s))
|
||||
return true;
|
||||
|
||||
// convert negative negative membership literals to positive
|
||||
// ~(s in R) => s in C(R)
|
||||
if (lit.sign()) {
|
||||
expr_ref fml(re().mk_in_re(s, re().mk_complement(r)), m);
|
||||
rewrite(fml);
|
||||
literal nlit = th.mk_literal(fml);
|
||||
th.propagate_lit(nullptr, 1, &lit, nlit);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!sk().is_tail(s) && coallesce_in_re(lit))
|
||||
return true;
|
||||
|
||||
// TBD
|
||||
// for !sk().is_tail(s):
|
||||
// s in R => R != {}
|
||||
|
||||
if (block_unfolding(lit, s))
|
||||
return true;
|
||||
|
||||
// s in R[if(p,R1,R2)] & p => s in R[R1]
|
||||
// s in R[if(p,R1,R2)] & ~p => s in R[R2]
|
||||
expr_ref cond(m), tt(m), el(m);
|
||||
if (seq_rw().has_cofactor(r, cond, tt, el)) {
|
||||
literal lcond = th.mk_literal(cond), next_lit;
|
||||
switch (ctx.get_assignment(lcond)) {
|
||||
case l_true: {
|
||||
rewrite(tt);
|
||||
literal lits[2] = { lit, lcond };
|
||||
next_lit = th.mk_literal(re().mk_in_re(s, tt));
|
||||
th.propagate_lit(nullptr, 2, lits, next_lit);
|
||||
return true;
|
||||
}
|
||||
case l_false: {
|
||||
rewrite(el);
|
||||
next_lit = th.mk_literal(re().mk_in_re(s, el));
|
||||
literal lits[2] = { lit, ~lcond };
|
||||
th.propagate_lit(nullptr, 2, lits, next_lit);
|
||||
return true;
|
||||
}
|
||||
case l_undef:
|
||||
ctx.mark_as_relevant(lcond);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// s in R & s = "" => nullable(R)
|
||||
literal is_empty = th.mk_eq(s, str().mk_empty(m.get_sort(s)), false);
|
||||
expr_ref is_nullable = seq_rw().is_nullable(r);
|
||||
rewrite(is_nullable);
|
||||
th.add_axiom(~lit, ~is_empty, th.mk_literal(is_nullable));
|
||||
switch (ctx.get_assignment(is_empty)) {
|
||||
case l_undef:
|
||||
ctx.mark_as_relevant(is_empty);
|
||||
return false;
|
||||
case l_true:
|
||||
return true;
|
||||
case l_false:
|
||||
break;
|
||||
}
|
||||
|
||||
// s in R & s != "" => s = head ++ tail
|
||||
// s in R & s != "" => tail in D(head, R)
|
||||
expr_ref head(m), tail(m), d(m);
|
||||
expr* h = nullptr;
|
||||
sk().decompose(s, head, tail);
|
||||
if (!str().is_unit(head, h))
|
||||
throw default_exception("expected a unit");
|
||||
d = seq_rw().derivative(h, r);
|
||||
if (!d)
|
||||
throw default_exception("unable to expand derivative");
|
||||
th.add_axiom(is_empty, th.mk_eq(s, th.mk_concat(head, tail), false));
|
||||
th.add_axiom(~lit, is_empty, th.mk_literal(re().mk_in_re(tail, d)));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool seq_regex::block_unfolding(literal lit, expr* s) {
|
||||
expr* t = nullptr;
|
||||
unsigned i = 0;
|
||||
if (!sk().is_tail_u(s, t, i))
|
||||
return false;
|
||||
if (i > th.m_max_unfolding_depth &&
|
||||
th.m_max_unfolding_lit != null_literal &&
|
||||
ctx.get_assignment(th.m_max_unfolding_lit) == l_true &&
|
||||
!ctx.at_base_level()) {
|
||||
th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool seq_regex::coallesce_in_re(literal lit) {
|
||||
// initially disable this
|
||||
return false;
|
||||
expr* s = nullptr, *r = nullptr;
|
||||
expr* e = ctx.bool_var2expr(lit.var());
|
||||
VERIFY(str().is_in_re(e, s, r));
|
||||
expr_ref regex(r, m);
|
||||
literal_vector lits;
|
||||
for (unsigned i = 0; i < m_s_in_re.size(); ++i) {
|
||||
auto const& entry = m_s_in_re[i];
|
||||
if (!entry.m_active)
|
||||
continue;
|
||||
if (th.get_root(entry.m_s) != th.get_root(s))
|
||||
continue;
|
||||
if (entry.m_re == regex)
|
||||
continue;
|
||||
|
||||
th.m_trail_stack.push(vector_value_trail<theory_seq, s_in_re, true>(m_s_in_re, i));
|
||||
m_s_in_re[i].m_active = false;
|
||||
IF_VERBOSE(11, verbose_stream() << "intersect " << regex << " " <<
|
||||
mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";);
|
||||
regex = re().mk_inter(entry.m_re, regex);
|
||||
rewrite(regex);
|
||||
lits.push_back(~entry.m_lit);
|
||||
enode* n1 = th.ensure_enode(entry.m_s);
|
||||
enode* n2 = th.ensure_enode(s);
|
||||
if (n1 != n2) {
|
||||
lits.push_back(~th.mk_eq(n1->get_owner(), n2->get_owner(), false));
|
||||
}
|
||||
}
|
||||
m_s_in_re.push_back(s_in_re(lit, s, regex));
|
||||
th.get_trail_stack().push(push_back_vector<theory_seq, vector<s_in_re>>(m_s_in_re));
|
||||
if (lits.empty())
|
||||
return false;
|
||||
lits.push_back(~lit);
|
||||
lits.push_back(th.mk_literal(re().mk_in_re(s, regex)));
|
||||
ctx.mk_th_axiom(th.get_id(), lits.size(), lits.c_ptr());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void seq_regex::propagate_is_empty(literal lit) {
|
||||
// the dual version of unroll_non_empty, but
|
||||
// skolem functions have to be eliminated or turned into
|
||||
// universal quantifiers.
|
||||
throw default_exception("emptiness checking of regex is TBD");
|
||||
}
|
||||
|
||||
void seq_regex::propagate_is_nonempty(expr* r) {
|
||||
sort* seq_sort = nullptr;
|
||||
VERIFY(u().is_re(r, seq_sort));
|
||||
literal lit = ~th.mk_eq(r, re().mk_empty(seq_sort), false);
|
||||
expr_mark seen;
|
||||
expr_ref non_empty = unroll_non_empty(r, seen, 0);
|
||||
if (non_empty) {
|
||||
rewrite(non_empty);
|
||||
th.add_axiom(~lit, th.mk_literal(non_empty));
|
||||
}
|
||||
else {
|
||||
// generally introduce predicate (re.nonempty r seen)
|
||||
// with inference rules based on unroll_non_empty
|
||||
throw default_exception("unrolling large regexes is TBD");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
nonempty(R union Q, Seen) = R = {} or Q = {}
|
||||
nonempty(R[if(p,R1,R2)], Seen) = if(p, nonempty(R[R1], Seen), nonempty(R[R2], Seen)) (co-factor)
|
||||
nonempty(R, Seen) = nullable(R) or (R not in Seen and nonempty(D(first(R),R), Seen u { R })) (derivative)
|
||||
|
||||
TBD: eliminate variables from p when possible to perform quantifier elimination.
|
||||
|
||||
p := first(R) == 'a'
|
||||
then replace first(R) by 'a' in R[R1]
|
||||
TBD:
|
||||
empty(R, Seen) = R = {} if R does not contain a subterm in Seen and Seen is non-empty
|
||||
|
||||
|
||||
first : RegEx -> Char is a skolem function
|
||||
*/
|
||||
|
||||
expr_ref seq_regex::mk_first(expr* r) {
|
||||
sort* elem_sort = nullptr, *seq_sort = nullptr;
|
||||
VERIFY(u().is_re(r, seq_sort));
|
||||
VERIFY(u().is_seq(seq_sort, elem_sort));
|
||||
return expr_ref(m.mk_fresh_const("re.first", elem_sort), m);
|
||||
// return sk().mk("re.first", r, elem_sort);
|
||||
}
|
||||
|
||||
expr_ref seq_regex::unroll_non_empty(expr* r, expr_mark& seen, unsigned depth) {
|
||||
if (seen.is_marked(r))
|
||||
return expr_ref(m.mk_false(), m);
|
||||
if (depth > 300)
|
||||
return expr_ref(m);
|
||||
expr_ref result(m), cond(m), th(m), el(m);
|
||||
// TBD: try also rewriting
|
||||
if (seq_rw().has_cofactor(r, cond, th, el)) {
|
||||
th = unroll_non_empty(th, seen, depth + 1);
|
||||
el = unroll_non_empty(el, seen, depth + 1);
|
||||
if (th && el)
|
||||
result = m.mk_ite(cond, th, el);
|
||||
return result;
|
||||
}
|
||||
expr_ref hd = mk_first(r);
|
||||
result = seq_rw().derivative(hd, r);
|
||||
if (result) {
|
||||
// TBD fast check if r is a subterm of result, if not, then
|
||||
// loop instead of recurse
|
||||
seen.mark(r, true);
|
||||
result = unroll_non_empty(result, seen, depth + 1);
|
||||
seen.mark(r, false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
77
src/smt/seq_regex.h
Normal file
77
src/smt/seq_regex.h
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/*++
|
||||
Copyright (c) 2011 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_regex.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Solver for regexes
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2020-5-22
|
||||
|
||||
--*/
|
||||
#pragma once
|
||||
|
||||
#include "util/scoped_vector.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "smt/smt_context.h"
|
||||
#include "smt/seq_skolem.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
class theory_seq;
|
||||
|
||||
class seq_regex {
|
||||
struct s_in_re {
|
||||
literal m_lit;
|
||||
expr* m_s;
|
||||
expr* m_re;
|
||||
bool m_active;
|
||||
s_in_re(literal l, expr* s, expr* r):
|
||||
m_lit(l), m_s(s), m_re(r), m_active(true) {}
|
||||
};
|
||||
|
||||
theory_seq& th;
|
||||
context& ctx;
|
||||
ast_manager& m;
|
||||
vector<s_in_re> m_s_in_re;
|
||||
scoped_vector<literal> m_to_propagate;
|
||||
|
||||
seq_util& u();
|
||||
class seq_util::re& re();
|
||||
class seq_util::str& str();
|
||||
seq_rewriter& seq_rw();
|
||||
seq_skolem& sk();
|
||||
|
||||
void rewrite(expr_ref& e);
|
||||
|
||||
bool propagate(literal lit);
|
||||
|
||||
bool coallesce_in_re(literal lit);
|
||||
|
||||
bool block_unfolding(literal lit, expr* s);
|
||||
|
||||
expr_ref mk_first(expr* r);
|
||||
|
||||
expr_ref unroll_non_empty(expr* r, expr_mark& seen, unsigned depth);
|
||||
|
||||
public:
|
||||
|
||||
seq_regex(theory_seq& th);
|
||||
|
||||
bool propagate();
|
||||
|
||||
void propagate_in_re(literal lit);
|
||||
|
||||
void propagate_is_empty(literal lit);
|
||||
|
||||
void propagate_is_nonempty(expr* r);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
|
@ -299,6 +299,7 @@ theory_seq::theory_seq(context& ctx):
|
|||
m_sk(m, m_rewrite),
|
||||
m_ax(*this, m_rewrite),
|
||||
m_unicode(*this),
|
||||
m_regex(*this),
|
||||
m_arith_value(m),
|
||||
m_trail_stack(*this),
|
||||
m_ls(m), m_rs(m),
|
||||
|
|
@ -367,6 +368,10 @@ final_check_status theory_seq::final_check_eh() {
|
|||
TRACEFIN("solve_nqs");
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
if (m_regex.propagate()) {
|
||||
TRACEFIN("regex propagate");
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
if (check_contains()) {
|
||||
++m_stats.m_propagate_contains;
|
||||
TRACEFIN("propagate_contains");
|
||||
|
|
@ -2617,6 +2622,7 @@ expr_ref theory_seq::add_elim_string_axiom(expr* n) {
|
|||
}
|
||||
|
||||
void theory_seq::propagate_in_re(expr* n, bool is_true) {
|
||||
|
||||
TRACE("seq", tout << mk_pp(n, m) << " <- " << (is_true?"true":"false") << "\n";);
|
||||
|
||||
expr_ref tmp(n, m);
|
||||
|
|
@ -3053,7 +3059,12 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
|
|||
}
|
||||
}
|
||||
else if (m_util.str.is_in_re(e)) {
|
||||
propagate_in_re(e, is_true);
|
||||
if (ctx.get_fparams().m_seq_use_derivatives) {
|
||||
m_regex.propagate_in_re(lit);
|
||||
}
|
||||
else {
|
||||
propagate_in_re(e, is_true);
|
||||
}
|
||||
}
|
||||
else if (m_sk.is_digit(e)) {
|
||||
// no-op
|
||||
|
|
|
|||
|
|
@ -34,11 +34,14 @@ Revision History:
|
|||
#include "smt/seq_skolem.h"
|
||||
#include "smt/seq_axioms.h"
|
||||
#include "smt/seq_unicode.h"
|
||||
#include "smt/seq_regex.h"
|
||||
#include "smt/seq_offset_eq.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
class theory_seq : public theory {
|
||||
friend class seq_regex;
|
||||
|
||||
struct assumption {
|
||||
enode* n1, *n2;
|
||||
literal lit;
|
||||
|
|
@ -404,6 +407,7 @@ namespace smt {
|
|||
seq_skolem m_sk;
|
||||
seq_axioms m_ax;
|
||||
seq_unicode m_unicode;
|
||||
seq_regex m_regex;
|
||||
arith_value m_arith_value;
|
||||
th_trail_stack m_trail_stack;
|
||||
stats m_stats;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue