3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-02-14 04:41:48 +00:00

Subterms Theory (#8115)

* somewhaat failed attempt at declaring subterm predicate

I can't really figure out how to link the smt parser to the rest of the
machinenery, so I will stop here and try from the other side. I'll start
implmenting the logic and see if it brings me back to the parser.

* initial logic implmentation

Very primitive, but I don't like have that much work uncommitted.

* parser implementation

* more theory

* Working base

* subterm reflexivity

* a few optimization

Skip adding obvious equalities or disequality

* removed some optimisations

* better handling of backtracking

* stupid segfault

Add m_subterm to the trail

* Update src/smt/theory_datatype.h

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/ast/rewriter/datatype_rewriter.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/smt/theory_datatype.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/smt/theory_datatype.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/smt/theory_datatype.cpp

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* review

* forgot to update `iterate_subterm`'s signature

* fix iterator segfault

* Remove duplicate include statement

Removed duplicate include of 'theory_datatype.h'.

* Replace 'optional' with 'std::option' in datatype_decl_plugin.h

* Add is_subterm_predicate matcher to datatype_decl_plugin

* Change std::option to std::optional for m_subterm

* Update pdecl.h

* Change has_subterm to use has_value method

* Update pdecl.cpp

---------

Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Simon Jeanteur 2026-01-13 19:53:17 +01:00 committed by GitHub
parent 7377d28c30
commit deaced1711
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 563 additions and 32 deletions

View file

@ -28,6 +28,7 @@ Revision History:
#include <iostream>
namespace smt {
class dt_eq_justification : public ext_theory_eq_propagation_justification {
public:
@ -260,6 +261,21 @@ namespace smt {
ctx.mk_th_axiom(get_id(), 2, lits);
}
void theory_datatype::assert_subterm_axioms(enode * n) {
sort * s = n->get_sort();
if (m_util.is_datatype(s)) {
func_decl * sub_decl = m_util.get_datatype_subterm(s);
if (sub_decl) {
TRACE(datatype, tout << "asserting reflexivity for #" << n->get_owner_id() << " " << mk_pp(n->get_expr(), m) << "\n";);
app_ref reflex(m.mk_app(sub_decl, n->get_expr(), n->get_expr()), m);
ctx.internalize(reflex, false);
literal l(ctx.get_bool_var(reflex));
ctx.mark_as_relevant(l);
ctx.mk_th_axiom(get_id(), 1, &l);
}
}
}
theory_var theory_datatype::mk_var(enode * n) {
theory_var r = theory::mk_var(n);
VERIFY(r == static_cast<theory_var>(m_find.mk_var()));
@ -267,6 +283,9 @@ namespace smt {
m_var_data.push_back(alloc(var_data));
var_data * d = m_var_data[r];
ctx.attach_th_var(n, this, r);
assert_subterm_axioms(n);
if (is_constructor(n)) {
d->m_constructor = n;
assert_accessor_axioms(n);
@ -327,7 +346,7 @@ namespace smt {
// it.
// Moreover, fresh variables of sort S can only be created after the
// interpretation for each (relevant) expression of sort S in the
// logical context is created. Returning to the example,
// logical context is created. Returning to the example,
// to create the interpretation of x1 we need the
// interpretation for x2. So, x2 cannot be a fresh value,
// since it would have to be created after x1.
@ -350,6 +369,18 @@ namespace smt {
}
mk_var(e);
}
else if (m_util.is_subterm_predicate(term)) {
SASSERT(term->get_num_args() == 2);
enode * arg1 = e->get_arg(0);
if (!is_attached_to_var(arg1))
mk_var(arg1);
enode * arg2 = e->get_arg(1);
if (!is_attached_to_var(arg2))
mk_var(arg2);
SASSERT(is_attached_to_var(arg1));
SASSERT(is_attached_to_var(arg2));
// Axiom generation logic for subterm can be added here.
}
else {
SASSERT(is_accessor(term) || is_recognizer(term));
SASSERT(term->get_num_args() == 1);
@ -413,35 +444,282 @@ namespace smt {
void theory_datatype::assign_eh(bool_var v, bool is_true) {
force_push();
enode * n = ctx.bool_var2enode(v);
if (!is_recognizer(n))
return;
TRACE(datatype, tout << "assigning recognizer: #" << n->get_owner_id() << " is_true: " << is_true << "\n"
<< enode_pp(n, ctx) << "\n";);
SASSERT(n->get_num_args() == 1);
enode * arg = n->get_arg(0);
theory_var tv = arg->get_th_var(get_id());
tv = m_find.find(tv);
var_data * d = m_var_data[tv];
func_decl * r = n->get_decl();
func_decl * c = m_util.get_recognizer_constructor(r);
if (is_true) {
SASSERT(tv != null_theory_var);
if (d->m_constructor != nullptr && d->m_constructor->get_decl() == c)
return; // do nothing
assert_is_constructor_axiom(arg, c, literal(v));
}
else {
if (d->m_constructor != nullptr) {
if (d->m_constructor->get_decl() == c) {
// conflict
sign_recognizer_conflict(d->m_constructor, n);
}
enode *n = ctx.bool_var2enode(v);
if (is_recognizer(n)) {
TRACE(datatype, tout << "assigning recognizer: #" << n->get_owner_id() << " is_true: " << is_true << "\n"
<< enode_pp(n, ctx) << "\n";);
SASSERT(n->get_num_args() == 1);
enode *arg = n->get_arg(0);
theory_var tv = arg->get_th_var(get_id());
tv = m_find.find(tv);
var_data *d = m_var_data[tv];
func_decl *r = n->get_decl();
func_decl *c = m_util.get_recognizer_constructor(r);
if (is_true) {
SASSERT(tv != null_theory_var);
if (d->m_constructor != nullptr && d->m_constructor->get_decl() == c)
return; // do nothing
assert_is_constructor_axiom(arg, c, literal(v));
propagate_subterm_with_constructor(tv);
}
else {
propagate_recognizer(tv, n);
if (d->m_constructor != nullptr) {
if (d->m_constructor->get_decl() == c) {
// conflict
sign_recognizer_conflict(d->m_constructor, n);
}
}
else {
propagate_recognizer(tv, n);
}
}
}
else if (is_subterm_predicate(n)) {
TRACE(datatype, tout << "assigning subterm: #" << n->get_owner_id() << " is_true: " << is_true << "\n"
<< enode_pp(n, ctx) << "\n";);
SASSERT(n->get_num_args() == 2);
propagate_subterm(n, is_true);
}
}
void theory_datatype::propagate_subterm_with_constructor(theory_var v) {
v = m_find.find(v);
var_data *d = m_var_data[v];
if (!d->m_constructor)
return;
ptr_vector<enode> subs(d->m_subterms);
for (enode *n : subs) {
lbool val = ctx.get_assignment(n);
switch (val) {
case l_undef: continue;
case l_true: propagate_subterm(n, true); break;
case l_false: propagate_subterm(n, false); break;
}
}
}
void theory_datatype::propagate_subterm(enode *n, bool is_true) {
force_push(); // I am fairly sure I need that here
if (is_true) {
propagate_is_subterm(n);
}
else {
propagate_not_is_subterm(n);
}
}
void theory_datatype::propagate_is_subterm(enode *n) {
SASSERT(is_subterm_predicate(n));
enode *arg1 = n->get_arg(0);
enode *arg2 = n->get_arg(1);
// If we are here, n is assigned true.
SASSERT(ctx.get_assignment(n) == l_true);
TRACE(datatype, tout << "propagate_is_subterm: " << enode_pp(n, ctx) << "\n";);
if (arg1->get_root() == arg2->get_root()) {
TRACE(datatype, tout << "subterm reflexivity, skipping " << "\n";);
return;
}
literal_vector lits;
lits.push_back(literal(ctx.enode2bool_var(n), true)); // antecedent: ~n
bool found_possible = false;
bool has_leaf_root = false;
ptr_vector<enode> candidates = list_subterms(arg2);
for (enode *s : candidates) {
bool is_leaf = !m_util.is_constructor(s->get_expr());
// Case 1: Equality check (arg1 == s)
// Valid if sorts are compatible.
if (s->get_sort() == arg1->get_sort()) {
// trying to be smarter about this causes other problems
TRACE(datatype, tout << "adding equality case: " << mk_pp(arg1->get_expr(), m)
<< " == " << mk_pp(s->get_expr(), m) << "\n";);
lits.push_back(mk_eq(arg1->get_expr(), s->get_expr(), false));
found_possible = true;
}
// Case 2: Recursive subterm check (arg1 ⊑ s)
// Only if s is a leaf (unexpanded) and not the root itself (to avoid tautology).
if (is_leaf) {
if (s->get_root() == arg2->get_root()) {
// If arg2 is a leaf, we haven't explored its possibilities yet.
has_leaf_root = true;
found_possible = true;
continue;
}
if (m_util.is_datatype(s->get_sort())) {
// arg1 ⊑ s
func_decl *sub_decl = m_util.get_datatype_subterm(s->get_sort());
if (sub_decl) {
TRACE(datatype, tout << "adding recursive case: " << mk_pp(arg1->get_expr(), m) << ""
<< mk_pp(s->get_expr(), m) << "\n";);
auto tmp = m.mk_not( m.mk_app(sub_decl, arg1->get_expr(), s->get_expr()));
lits.push_back(mk_literal(app_ref(tmp, m)));
found_possible = true;
}
}
}
}
if (has_leaf_root) {
split_leaf_root(arg2);
}
if (lits.size() > 1) {
if (!has_leaf_root) {
ctx.mk_th_axiom(get_id(), lits.size(), lits.data());
}
}
else if (!found_possible) {
// Conflict: arg1 cannot be subterm of arg2 (no path matches)
TRACE(datatype, tout << "conflict: no path matches\n";);
ctx.mk_th_axiom(get_id(), lits.size(), lits.data());
}
}
void theory_datatype::propagate_not_is_subterm(enode *n) {
SASSERT(is_subterm_predicate(n));
enode *arg1 = n->get_arg(0);
enode *arg2 = n->get_arg(1);
// If we are here, n is assigned false.
SASSERT(ctx.get_assignment(n) == l_false);
if (arg1->get_root() == arg2->get_root()) {
// ~ (a ⊑ a) is a conflict
literal l(ctx.enode2bool_var(n));
ctx.set_conflict(ctx.mk_justification(ext_theory_conflict_justification(get_id(), ctx, 1, &l, 0, nullptr)));
return;
}
TRACE(datatype, tout << "propagate_not_is_subterm: " << enode_pp(n, ctx) << "\n";);
literal antecedent = literal(ctx.enode2bool_var(n), false);
bool has_leaf_root = false;
ptr_vector<enode> candidates = list_subterms(arg2);
for (enode *s : candidates) {
bool is_leaf = !m_util.is_constructor(s->get_expr());
if (s->get_sort() == arg1->get_sort()) {
TRACE(datatype,
tout << "asserting " << mk_pp(arg1->get_expr(), m) << " != " << mk_pp(s->get_expr(), m) << "\n";);
literal eq = mk_eq(arg1->get_expr(), s->get_expr(), true);
literal lits[2] = {antecedent, ~eq};
ctx.mk_th_axiom(get_id(), 2, lits);
}
if (is_leaf) {
if (s->get_root() == arg2->get_root()) {
has_leaf_root = true;
continue;
}
if (m_util.is_datatype(s->get_sort())) {
func_decl *sub_decl = m_util.get_datatype_subterm(s->get_sort());
if (sub_decl) {
TRACE(datatype, tout << "asserting NOT " << mk_pp(arg1->get_expr(), m) << " subterm "
<< mk_pp(s->get_expr(), m) << "\n";);
app_ref sub_app(m.mk_app(sub_decl, arg1->get_expr(), s->get_expr()), m);
ctx.internalize(sub_app, false);
literal sub_lit = literal(ctx.get_bool_var(sub_app));
literal lits[2] = {antecedent, ~sub_lit};
ctx.mk_th_axiom(get_id(), 2, lits);
}
}
}
}
if (has_leaf_root) {
split_leaf_root(arg2);
}
}
// requesting to split on arg2
void theory_datatype::split_leaf_root(smt::enode *arg2) {
TRACE(datatype, tout << "arg is a leaf: " << enode_pp(arg2, ctx) << "\n";);
theory_var v = arg2->get_th_var(get_id());
if (v != null_theory_var) {
v = m_find.find(v);
if (m_var_data[v]->m_constructor == nullptr) {
mk_split(v);
}
}
}
void subterm_iterator::next() {
m_current = nullptr;
if (!m_manager)
return;
while (!m_todo.empty()) {
enode *curr = m_todo.back();
m_todo.pop_back();
enode *root = curr->get_root();
if (root->is_marked())
continue;
root->set_mark();
m_marked.push_back(root);
enode *ctor = nullptr;
enode *iter = root;
do {
if (m_util->is_constructor(iter->get_expr())) {
ctor = iter;
break;
}
iter = iter->get_next();
} while (iter != root);
if (ctor) {
m_current = ctor;
for (enode *child : enode::args(ctor)) {
m_todo.push_back(child);
}
return;
}
else {
m_current = root;
return;
}
}
}
subterm_iterator::subterm_iterator(ast_manager &m, datatype_util& m_util, enode *start) : m_manager(&m), m_current(nullptr), m_util(&m_util) {
m_todo.push_back(start);
next();
}
subterm_iterator::subterm_iterator(subterm_iterator &&other) : m_manager(nullptr), m_current(nullptr), m_util(nullptr) {
m_todo.swap(other.m_todo);
m_marked.swap(other.m_marked);
std::swap(m_manager, other.m_manager);
std::swap(m_current, other.m_current);
std::swap(m_util, other.m_util);
}
subterm_iterator::~subterm_iterator() {
for (enode *n : m_marked)
n->unset_mark();
}
ptr_vector<enode> theory_datatype::list_subterms(enode* arg) {
ptr_vector<enode> result;
for (enode* n : iterate_subterms(get_manager(), m_util, arg)) {
result.push_back(n);
}
return result;
}
void theory_datatype::relevant_eh(app * n) {
@ -455,6 +733,19 @@ namespace smt {
SASSERT(v != null_theory_var);
add_recognizer(v, e);
}
else if (is_subterm_predicate(n)) {
SASSERT(ctx.e_internalized(n));
enode * e = ctx.get_enode(n);
theory_var a = e->get_arg(0)->get_th_var(get_id()); // e is 'a ⊑ b'
theory_var b = e->get_arg(1)->get_th_var(get_id()); // e is 'a ⊑ b'
SASSERT(a != null_theory_var && b != null_theory_var);
add_subterm_predicate(a, e);
add_subterm_predicate(b, e);
// propagating potentially adds a lot of literals, avoid it if we can
}
}
void theory_datatype::push_scope_eh() {
@ -872,11 +1163,16 @@ namespace smt {
}
}
d1->m_constructor = d2->m_constructor;
propagate_subterm_with_constructor(v1);
}
}
for (enode* e : d2->m_recognizers)
if (e)
add_recognizer(v1, e);
for (enode* e : d2->m_subterms) {
add_subterm_predicate(v1, e);
}
}
void theory_datatype::unmerge_eh(theory_var v1, theory_var v2) {
@ -921,6 +1217,26 @@ namespace smt {
}
}
/** \brief register `predicate` to `v`'s `var_data`
*
* With `predicate:='a b'` this should be called with `v:='a'` and `v:='b'`.
*
* This doesn't handle potential propagation. The responsibility for it
* falls on the caller.
*/
void theory_datatype::add_subterm_predicate(theory_var v, enode * predicate) {
SASSERT(is_subterm_predicate(predicate));
v = m_find.find(v);
var_data * d = m_var_data[v];
if (d->m_subterms.contains(predicate)) return;
TRACE(datatype, tout << "add subterm predicate\n" << enode_pp(predicate, ctx) << "\n";);
m_trail_stack.push(restore_vector(d->m_subterms));
d->m_subterms.push_back(predicate);
}
/**
\brief Propagate a recognizer assigned to false.
*/

View file

@ -33,6 +33,18 @@ namespace smt {
struct var_data {
ptr_vector<enode> m_recognizers; //!< recognizers of this equivalence class that are being watched.
enode * m_constructor; //!< constructor of this equivalence class, 0 if there is no constructor in the eqc.
/**
* \brief subterm predicates that involve this equivalence class
*
* So all terms of the shape `a b` where `var_data` represents either `a` or `b`.
*
* This is more a set than a vector, but I'll use `ptr_vector`
* because I know the API better, it's easier to backtrack on it and
* it should be small enough to outperform a hasmap anyway
*/
ptr_vector<enode> m_subterms;
var_data():
m_constructor(nullptr) {
}
@ -56,11 +68,13 @@ namespace smt {
bool is_constructor(app * f) const { return m_util.is_constructor(f); }
bool is_recognizer(app * f) const { return m_util.is_recognizer(f); }
bool is_subterm_predicate(app * f) const { return m_util.is_subterm_predicate(f); }
bool is_accessor(app * f) const { return m_util.is_accessor(f); }
bool is_update_field(app * f) const { return m_util.is_update_field(f); }
bool is_constructor(enode * n) const { return is_constructor(n->get_expr()); }
bool is_recognizer(enode * n) const { return is_recognizer(n->get_expr()); }
bool is_subterm_predicate(enode * n) const { return is_subterm_predicate(n->get_expr()); }
bool is_accessor(enode * n) const { return is_accessor(n->get_expr()); }
bool is_update_field(enode * n) const { return m_util.is_update_field(n->get_expr()); }
@ -68,8 +82,15 @@ namespace smt {
void assert_is_constructor_axiom(enode * n, func_decl * c, literal antecedent);
void assert_accessor_axioms(enode * n);
void assert_update_field_axioms(enode * n);
void assert_subterm_axioms(enode * n);
void add_recognizer(theory_var v, enode * recognizer);
void propagate_recognizer(theory_var v, enode * r);
void add_subterm_predicate(theory_var v, enode *predicate);
void propagate_subterm(enode * n, bool is_true);
void propagate_is_subterm(enode * n);
void propagate_not_is_subterm(enode *n);
void split_leaf_root(smt::enode *arg2);
void propagate_subterm_with_constructor(theory_var v);
void propagate_recognizer(theory_var v, enode *r);
void sign_recognizer_conflict(enode * c, enode * r);
typedef enum { ENTER, EXIT } stack_op;
@ -113,6 +134,7 @@ namespace smt {
void mk_split(theory_var v);
void display_var(std::ostream & out, theory_var v) const;
ptr_vector<enode> list_subterms(enode* arg);
protected:
theory_var mk_var(enode * n) override;
@ -148,6 +170,57 @@ namespace smt {
};
/**
* Iterator over the subterms of an enode.
*
* It only takes into account datatype terms when looking for subterms.
*
* It uses the `mark` field of the `enode` struct to mark the node visited.
* It will clean afterwards. *Implementation invariant*: the destructor
* *must* be run *exactly* once otherwise the marks might not be clean or
* might be clean more than once and mid search
*/
class subterm_iterator {
ptr_vector<enode> m_todo;
ptr_vector<enode> m_marked;
ast_manager* m_manager;
enode* m_current;
datatype_util* m_util;
void next();
subterm_iterator() : m_manager(nullptr), m_current(nullptr), m_util(nullptr) {}
public:
// subterm_iterator();
subterm_iterator(ast_manager& m, datatype_util& m_util, enode *start);
~subterm_iterator();
subterm_iterator(subterm_iterator &&other);
// need to delete this function otherwise the destructor could be ran
// more than once, invalidating the marks used in the dfs.
subterm_iterator(const subterm_iterator& other) = delete;
subterm_iterator begin() {
return std::move(*this);
}
subterm_iterator end() {
return subterm_iterator();
}
bool operator!=(const subterm_iterator &other) const {
return m_current != other.m_current;
}
enode *operator*() const {
return m_current;
}
void operator++() { next(); }
subterm_iterator& operator=(const subterm_iterator&) = delete;
};
inline subterm_iterator iterate_subterms(ast_manager& m, datatype_util& m_util, enode *arg) {
return subterm_iterator(m, m_util, arg);
}
};