3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 22:04:53 +00:00

Add OP_SEQ_POWER to seq_decl_plugin and create euf_snode/euf_sgraph for string graph encapsulation

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-01 20:19:37 +00:00
parent debe81ee74
commit 2e17bb8767
6 changed files with 659 additions and 0 deletions

View file

@ -9,6 +9,7 @@ z3_add_component(euf
euf_justification.cpp
euf_mam.cpp
euf_plugin.cpp
euf_sgraph.cpp
euf_specrel_plugin.cpp
ho_matcher.cpp
COMPONENT_DEPENDENCIES

406
src/ast/euf/euf_sgraph.cpp Normal file
View file

@ -0,0 +1,406 @@
/*++
Copyright (c) 2025 Microsoft Corporation
Module Name:
euf_sgraph.cpp
Abstract:
Sequence/string graph implementation
Author:
Nikolaj Bjorner (nbjorner) 2025-03-01
--*/
#include "ast/euf/euf_sgraph.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
namespace euf {
sgraph::sgraph(ast_manager& m):
m(m),
m_seq(m),
m_exprs(m) {
}
sgraph::~sgraph() {
}
snode_kind sgraph::classify(expr* e) const {
if (!is_app(e))
return snode_kind::s_other;
if (m_seq.str.is_empty(e))
return snode_kind::s_empty;
if (m_seq.str.is_string(e)) {
zstring s;
if (m_seq.str.is_string(e, s) && s.empty())
return snode_kind::s_empty;
return snode_kind::s_other;
}
if (m_seq.str.is_concat(e))
return snode_kind::s_concat;
if (m_seq.str.is_unit(e)) {
expr* ch = to_app(e)->get_arg(0);
if (m_seq.is_const_char(ch))
return snode_kind::s_char;
return snode_kind::s_unit;
}
if (m_seq.str.is_power(e))
return snode_kind::s_power;
if (m_seq.re.is_star(e))
return snode_kind::s_star;
if (m_seq.re.is_loop(e))
return snode_kind::s_loop;
if (m_seq.re.is_union(e))
return snode_kind::s_union;
if (m_seq.re.is_intersection(e))
return snode_kind::s_intersect;
if (m_seq.re.is_complement(e))
return snode_kind::s_complement;
if (m_seq.re.is_empty(e))
return snode_kind::s_fail;
if (m_seq.re.is_full_char(e))
return snode_kind::s_full_char;
if (m_seq.re.is_full_seq(e))
return snode_kind::s_full_seq;
if (m_seq.re.is_to_re(e))
return snode_kind::s_to_re;
if (m_seq.str.is_in_re(e))
return snode_kind::s_in_re;
// uninterpreted constants of string sort are variables
if (is_uninterp_const(e) && m_seq.is_seq(e->get_sort()))
return snode_kind::s_var;
return snode_kind::s_other;
}
void sgraph::compute_metadata(snode* n) {
switch (n->m_kind) {
case snode_kind::s_empty:
n->m_ground = true;
n->m_regex_free = true;
n->m_nullable = true;
n->m_level = 0;
n->m_length = 0;
break;
case snode_kind::s_char:
n->m_ground = true;
n->m_regex_free = true;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_var:
n->m_ground = false;
n->m_regex_free = true;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_unit:
n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
n->m_regex_free = true;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_concat: {
SASSERT(n->num_args() == 2);
snode* l = n->arg(0);
snode* r = n->arg(1);
n->m_ground = l->is_ground() && r->is_ground();
n->m_regex_free = l->is_regex_free() && r->is_regex_free();
n->m_nullable = l->is_nullable() && r->is_nullable();
n->m_level = std::max(l->level(), r->level()) + 1;
n->m_length = l->length() + r->length();
++m_stats.m_num_concat;
break;
}
case snode_kind::s_power: {
SASSERT(n->num_args() >= 1);
snode* base = n->arg(0);
n->m_ground = base->is_ground();
n->m_regex_free = base->is_regex_free();
n->m_nullable = base->is_nullable();
n->m_level = 1;
n->m_length = 1;
++m_stats.m_num_power;
break;
}
case snode_kind::s_star:
SASSERT(n->num_args() == 1);
n->m_ground = n->arg(0)->is_ground();
n->m_regex_free = false;
n->m_nullable = true;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_loop:
n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
n->m_regex_free = false;
n->m_nullable = false; // depends on lower bound
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_union:
SASSERT(n->num_args() == 2);
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
n->m_regex_free = false;
n->m_nullable = n->arg(0)->is_nullable() || n->arg(1)->is_nullable();
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_intersect:
SASSERT(n->num_args() == 2);
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
n->m_regex_free = false;
n->m_nullable = n->arg(0)->is_nullable() && n->arg(1)->is_nullable();
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_complement:
SASSERT(n->num_args() == 1);
n->m_ground = n->arg(0)->is_ground();
n->m_regex_free = false;
n->m_nullable = !n->arg(0)->is_nullable();
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_fail:
n->m_ground = true;
n->m_regex_free = false;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_full_char:
n->m_ground = true;
n->m_regex_free = false;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_full_seq:
n->m_ground = true;
n->m_regex_free = false;
n->m_nullable = true;
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_to_re:
SASSERT(n->num_args() == 1);
n->m_ground = n->arg(0)->is_ground();
n->m_regex_free = false;
n->m_nullable = n->arg(0)->is_nullable();
n->m_level = 1;
n->m_length = 1;
break;
case snode_kind::s_in_re:
SASSERT(n->num_args() == 2);
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
n->m_regex_free = false;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
default:
n->m_ground = true;
n->m_regex_free = true;
n->m_nullable = false;
n->m_level = 1;
n->m_length = 1;
break;
}
}
snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) {
unsigned id = m_nodes.size();
snode* n = snode::mk(m_region, e, k, id, num_args, args);
compute_metadata(n);
m_nodes.push_back(n);
m_exprs.push_back(e);
if (e) {
unsigned eid = e->get_id();
m_expr2snode.reserve(eid + 1, nullptr);
m_expr2snode[eid] = n;
}
++m_stats.m_num_nodes;
return n;
}
snode* sgraph::mk(expr* e) {
SASSERT(e);
snode* n = find(e);
if (n)
return n;
snode_kind k = classify(e);
if (!is_app(e))
return mk_snode(e, k, 0, nullptr);
app* a = to_app(e);
unsigned arity = a->get_num_args();
// recursively register children that are sequences or regexes
snode_vector child_nodes;
for (unsigned i = 0; i < arity; ++i) {
expr* ch = a->get_arg(i);
if (m_seq.is_seq(ch) || m_seq.is_re(ch)) {
snode* cn = mk(ch);
child_nodes.push_back(cn);
}
}
return mk_snode(e, k, child_nodes.size(), child_nodes.data());
}
snode* sgraph::find(expr* e) const {
if (!e)
return nullptr;
unsigned eid = e->get_id();
if (eid < m_expr2snode.size())
return m_expr2snode[eid];
return nullptr;
}
snode* sgraph::mk_empty(sort* s) {
expr_ref e(m_seq.str.mk_empty(s), m);
return mk(e);
}
snode* sgraph::mk_concat(snode* a, snode* b) {
SASSERT(a && b);
if (a->is_empty()) return b;
if (b->is_empty()) return a;
expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m);
snode* n = find(e);
if (n) return n;
snode* args[2] = { a, b };
return mk_snode(e, snode_kind::s_concat, 2, args);
}
snode* sgraph::mk_power(snode* base, snode* exp) {
SASSERT(base && exp);
expr_ref e(m_seq.str.mk_power(base->get_expr(), exp->get_expr()), m);
snode* n = find(e);
if (n) return n;
snode* args[2] = { base, exp };
return mk_snode(e, snode_kind::s_power, 2, args);
}
void sgraph::push() {
m_scopes.push_back(m_nodes.size());
++m_num_scopes;
}
void sgraph::pop(unsigned num_scopes) {
if (num_scopes == 0)
return;
SASSERT(num_scopes <= m_num_scopes);
unsigned new_lvl = m_num_scopes - num_scopes;
unsigned old_sz = m_scopes[new_lvl];
for (unsigned i = m_nodes.size(); i-- > old_sz; ) {
snode* n = m_nodes[i];
if (n->get_expr()) {
unsigned eid = n->get_expr()->get_id();
if (eid < m_expr2snode.size())
m_expr2snode[eid] = nullptr;
}
}
m_nodes.shrink(old_sz);
m_exprs.shrink(old_sz);
m_scopes.shrink(new_lvl);
m_num_scopes = new_lvl;
}
std::ostream& sgraph::display(std::ostream& out) const {
auto kind_str = [](snode_kind k) -> char const* {
switch (k) {
case snode_kind::s_empty: return "empty";
case snode_kind::s_char: return "char";
case snode_kind::s_var: return "var";
case snode_kind::s_unit: return "unit";
case snode_kind::s_concat: return "concat";
case snode_kind::s_power: return "power";
case snode_kind::s_star: return "star";
case snode_kind::s_loop: return "loop";
case snode_kind::s_union: return "union";
case snode_kind::s_intersect: return "intersect";
case snode_kind::s_complement: return "complement";
case snode_kind::s_fail: return "fail";
case snode_kind::s_full_char: return "full_char";
case snode_kind::s_full_seq: return "full_seq";
case snode_kind::s_to_re: return "to_re";
case snode_kind::s_in_re: return "in_re";
case snode_kind::s_other: return "other";
}
return "?";
};
for (snode* n : m_nodes) {
out << "snode[" << n->id() << "] "
<< kind_str(n->kind())
<< " level=" << n->level()
<< " len=" << n->length()
<< " ground=" << n->is_ground()
<< " rfree=" << n->is_regex_free()
<< " nullable=" << n->is_nullable();
if (n->num_args() > 0) {
out << " args=(";
for (unsigned i = 0; i < n->num_args(); ++i) {
if (i > 0) out << ", ";
out << n->arg(i)->id();
}
out << ")";
}
if (n->get_expr())
out << " expr=" << mk_pp(n->get_expr(), m);
out << "\n";
}
return out;
}
void sgraph::collect_statistics(statistics& st) const {
st.update("seq-graph-nodes", m_stats.m_num_nodes);
st.update("seq-graph-concat", m_stats.m_num_concat);
st.update("seq-graph-power", m_stats.m_num_power);
}
}

95
src/ast/euf/euf_sgraph.h Normal file
View file

@ -0,0 +1,95 @@
/*++
Copyright (c) 2025 Microsoft Corporation
Module Name:
euf_sgraph.h
Abstract:
Sequence/string graph layer
Encapsulates string expressions in the style of euf_egraph.h.
The sgraph registers sequence expressions and classifies them
into a ZIPT-style representation of string tokens organized
as binary concatenation trees.
This provides a layer that maps Z3 AST expressions (from seq_decl_plugin)
to snode structures with metadata for ground, regex-free, nullable, etc.
Author:
Nikolaj Bjorner (nbjorner) 2025-03-01
--*/
#pragma once
#include "util/region.h"
#include "util/statistics.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_snode.h"
namespace euf {
class sgraph {
struct stats {
unsigned m_num_nodes;
unsigned m_num_concat;
unsigned m_num_power;
stats() { reset(); }
void reset() { memset(this, 0, sizeof(*this)); }
};
ast_manager& m;
seq_util m_seq;
region m_region;
snode_vector m_nodes;
expr_ref_vector m_exprs; // pin expressions
unsigned_vector m_scopes;
unsigned m_num_scopes = 0;
stats m_stats;
// maps expression id to snode
ptr_vector<snode> m_expr2snode;
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
public:
sgraph(ast_manager& m);
~sgraph();
ast_manager& get_manager() const { return m; }
seq_util& get_seq_util() { return m_seq; }
// register an expression and return its snode
snode* mk(expr* e);
// lookup an already-registered expression
snode* find(expr* e) const;
// build compound snodes
snode* mk_empty(sort* s);
snode* mk_concat(snode* a, snode* b);
snode* mk_power(snode* base, snode* exp);
// scope management for backtracking
void push();
void pop(unsigned num_scopes);
// access
snode_vector const& nodes() const { return m_nodes; }
unsigned num_nodes() const { return m_nodes.size(); }
// display
std::ostream& display(std::ostream& out) const;
void collect_statistics(statistics& st) const;
};
}

148
src/ast/euf/euf_snode.h Normal file
View file

@ -0,0 +1,148 @@
/*++
Copyright (c) 2025 Microsoft Corporation
Module Name:
euf_snode.h
Abstract:
snode layer for sequence/string graph
Encapsulates strings in the style of euf_enode.h.
Maps Z3 sequence expressions to a ZIPT-style representation where
strings are composed of tokens (characters, variables, powers, regex, etc.)
organized as a binary tree of concatenations.
Author:
Nikolaj Bjorner (nbjorner) 2025-03-01
--*/
#pragma once
#include "util/vector.h"
#include "util/region.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
namespace euf {
class sgraph;
class snode;
typedef ptr_vector<snode> snode_vector;
enum class snode_kind {
s_empty, // empty string (OP_SEQ_EMPTY or empty string constant)
s_char, // concrete character unit (OP_SEQ_UNIT wrapping a char literal)
s_var, // string variable (uninterpreted constant of string sort)
s_unit, // generic unit (OP_SEQ_UNIT with non-literal character)
s_concat, // concatenation of two snodes (OP_SEQ_CONCAT)
s_power, // string exponentiation s^n (OP_SEQ_POWER)
s_star, // Kleene star r* (OP_RE_STAR)
s_loop, // bounded loop r{lo,hi} (OP_RE_LOOP)
s_union, // union r1|r2 (OP_RE_UNION)
s_intersect, // intersection r1&r2 (OP_RE_INTERSECT)
s_complement, // complement ~r (OP_RE_COMPLEMENT)
s_fail, // empty language (OP_RE_EMPTY_SET)
s_full_char, // full character set (OP_RE_FULL_CHAR_SET)
s_full_seq, // full sequence set r=.* (OP_RE_FULL_SEQ_SET)
s_to_re, // string to regex (OP_SEQ_TO_RE)
s_in_re, // regex membership (OP_SEQ_IN_RE)
s_other, // other sequence expression not directly classified
};
class snode {
expr* m_expr = nullptr;
snode_kind m_kind = snode_kind::s_other;
unsigned m_id = UINT_MAX;
unsigned m_num_args = 0;
// metadata flags, analogous to ZIPT's Str/StrToken properties
bool m_ground = true; // no uninterpreted string variables
bool m_regex_free = true; // no regex constructs
bool m_nullable = false; // accepts the empty string
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
unsigned m_length = 0; // token count, number of leaf tokens in the tree
snode* m_args[0];
friend class sgraph;
static unsigned get_snode_size(unsigned num_args) {
return sizeof(snode) + num_args * sizeof(snode*);
}
static snode* mk(region& r, expr* e, snode_kind k, unsigned id, unsigned num_args, snode* const* args) {
void* mem = r.allocate(get_snode_size(num_args));
snode* n = new (mem) snode();
n->m_expr = e;
n->m_kind = k;
n->m_id = id;
n->m_num_args = num_args;
for (unsigned i = 0; i < num_args; ++i)
n->m_args[i] = args[i];
return n;
}
public:
expr* get_expr() const { return m_expr; }
snode_kind kind() const { return m_kind; }
unsigned id() const { return m_id; }
unsigned num_args() const { return m_num_args; }
snode* arg(unsigned i) const { SASSERT(i < m_num_args); return m_args[i]; }
bool is_ground() const { return m_ground; }
bool is_regex_free() const { return m_regex_free; }
bool is_nullable() const { return m_nullable; }
unsigned level() const { return m_level; }
unsigned length() const { return m_length; }
bool is_empty() const { return m_kind == snode_kind::s_empty; }
bool is_char() const { return m_kind == snode_kind::s_char; }
bool is_var() const { return m_kind == snode_kind::s_var; }
bool is_unit() const { return m_kind == snode_kind::s_unit; }
bool is_concat() const { return m_kind == snode_kind::s_concat; }
bool is_power() const { return m_kind == snode_kind::s_power; }
bool is_star() const { return m_kind == snode_kind::s_star; }
bool is_loop() const { return m_kind == snode_kind::s_loop; }
bool is_union() const { return m_kind == snode_kind::s_union; }
bool is_intersect() const { return m_kind == snode_kind::s_intersect; }
bool is_complement() const { return m_kind == snode_kind::s_complement; }
bool is_fail() const { return m_kind == snode_kind::s_fail; }
bool is_full_char() const { return m_kind == snode_kind::s_full_char; }
bool is_full_seq() const { return m_kind == snode_kind::s_full_seq; }
bool is_to_re() const { return m_kind == snode_kind::s_to_re; }
bool is_in_re() const { return m_kind == snode_kind::s_in_re; }
// is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
bool is_token() const {
switch (m_kind) {
case snode_kind::s_empty:
case snode_kind::s_concat:
return false;
default:
return true;
}
}
// analogous to ZIPT's Str.First / Str.Last
snode* first() const {
snode const* s = this;
while (s->is_concat())
s = s->arg(0);
return const_cast<snode*>(s);
}
snode* last() const {
snode const* s = this;
while (s->is_concat())
s = s->arg(1);
return const_cast<snode*>(s);
}
};
}

View file

@ -222,6 +222,7 @@ void seq_decl_plugin::init() {
m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT);
m_sigs[OP_SEQ_POWER] = alloc(psig, m, "seq.power", 1, 2, seqAintT, seqA);
m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA);
m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA);
m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA);
@ -591,6 +592,10 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
add_map_sig();
return mk_str_fun(k, arity, domain, range, k);
case OP_SEQ_POWER:
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
case OP_SEQ_TO_RE:
m_has_re = true;
return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);

View file

@ -59,6 +59,7 @@ enum seq_op_kind {
OP_SEQ_MAPI, // Array[Int,A,B] -> Int -> Seq[A] -> Seq[B]
OP_SEQ_FOLDL, // Array[B,A,B] -> B -> Seq[A] -> B
OP_SEQ_FOLDLI, // Array[Int,B,A,B] -> Int -> B -> Seq[A] -> B
OP_SEQ_POWER, // Seq -> Int -> Seq, string exponentiation s^n
OP_RE_PLUS,
OP_RE_STAR,
@ -307,6 +308,7 @@ public:
app* mk_mapi(expr* f, expr* i, expr* s) const { expr* es[3] = { f, i, s }; return m.mk_app(m_fid, OP_SEQ_MAPI, 3, es); }
app* mk_foldl(expr* f, expr* b, expr* s) const { expr* es[3] = { f, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDL, 3, es); }
app* mk_foldli(expr* f, expr* i, expr* b, expr* s) const { expr* es[4] = { f, i, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDLI, 4, es); }
app* mk_power(expr* s, expr* n) const { expr* es[2] = { s, n }; return m.mk_app(m_fid, OP_SEQ_POWER, 2, es); }
app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
@ -348,6 +350,7 @@ public:
bool is_mapi(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_MAPI); }
bool is_foldl(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDL); }
bool is_foldli(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDLI); }
bool is_power(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_POWER); }
bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); }
bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); }
bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); }
@ -404,6 +407,7 @@ public:
MATCH_TERNARY(is_mapi);
MATCH_TERNARY(is_foldl);
MATCH_QUATARY(is_foldli);
MATCH_BINARY(is_power);
MATCH_BINARY(is_last_index);
MATCH_TERNARY(is_replace);
MATCH_TERNARY(is_replace_re);