3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-14 04:45:39 +00:00
z3/src/ast/rewriter/seq_regex_bisim.h
2026-06-10 15:06:12 -07:00

100 lines
3.1 KiB
C++

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_regex_bisim.h
Abstract:
Union-find based bisimulation algorithm for symbolic regular expression
equivalence, based on the construction described in:
"Symbolic Extended Regular Expression Equivalence"
Ian, Kathi, Margus
The algorithm decides equivalence of two regexes p, q by performing
a bisimulation search on the symbolic derivative of p XOR q. A
union-find structure tracks pairs of regex states that should be
bisimilar, and the worklist is initialised with the singleton
{p XOR q}.
The algorithm returns:
l_true p and q are equivalent (bisimilar)
l_false p and q are inequivalent (a distinguishing prefix exists)
l_undef the algorithm could not decide within the configured bound
or encountered a non-ground regex it cannot reason about
Author:
Nikolaj Bjorner (nbjorner)
Margus Veanes (veanes)
--*/
#pragma once
#include "ast/seq_decl_plugin.h"
#include "util/lbool.h"
#include "util/obj_hashtable.h"
#include "util/union_find.h"
class seq_rewriter;
namespace seq {
/*
Union-find based bisimulation algorithm for deciding equivalence
of two ground (closed) regular expressions in ERE.
Usage:
seq::regex_bisim bisim(rewriter);
lbool r = bisim.are_equivalent(p, q);
if (r == l_true) // p ≡ q
if (r == l_false) // p ≢ q
if (r == l_undef) // cannot decide
The implementation only attempts the equivalence check on ground
regexes (no free variables) that use the standard symbolic regex
constructors. For non-ground or unsupported inputs the procedure
conservatively returns l_undef.
*/
class regex_bisim {
private:
ast_manager& m;
seq_rewriter& m_rw;
seq_util m_util;
basic_union_find m_uf;
obj_map<expr, unsigned> m_node_of;
expr_ref_vector m_pinned;
expr_ref_vector m_worklist;
unsigned m_step_bound { 50000 };
unsigned m_steps { 0 };
unsigned node_of(expr* r);
bool merge_leaf(expr* xor_pair);
bool collect_leaves(expr* der, expr_ref_vector& leaves);
lbool nullability(expr* r);
bool is_supported(expr* r);
// Returns true if the leaf l proves that the original pair is
// inequivalent and the bisim can short-circuit to l_false.
// Uses the get_info().classical invariant: a classical regex
// has non-empty language, so a bare classical leaf (implicitly
// empty XOR r) is a distinguishing prefix. Similarly an XOR of
// two classical regexes with different min_length is
// distinguishing.
bool classical_distinguishing(expr* l);
void reset();
lbool are_equivalent_core(expr* p, expr* q);
public:
regex_bisim(seq_rewriter& rw);
void set_step_bound(unsigned n) { m_step_bound = n; }
lbool are_equivalent(expr* p, expr* q);
};
}