Lookahead for regex splits applied to membership constraints

Rewriting constraint/prefix/suffix with constant strings to regexes
2026-07-11 17:46:20 +00:00 · 2026-06-10 20:35:36 +02:00 · 2026-06-10 20:35:36 +02:00 · 2dbefbcd56
commit 2dbefbcd56
parent f9f16550e0
7 changed files with 220 additions and 69 deletions
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@ -383,9 +383,11 @@ public:
        return result;
    }

-    // Split decomposition (sigma) of a regex; see seq_split.h.
-    bool split(expr* r, split_set& out, unsigned threshold, split_mode mode = split_mode::strong) {
-        return m_split.compute(r, out, threshold, mode);
+    // Split decomposition (sigma) of a regex; see seq_split.h.  `oracle` (optional)
+    // prunes non-viable splits during generation.
+    bool split(expr* r, split_set& out, unsigned threshold,
+               split_mode mode = split_mode::strong, split_oracle const& oracle = {}) {
+        return m_split.compute(r, out, threshold, mode, oracle);
    }
    void simplify_split(split_set& s) { m_split.simplify(s); }

--- a/src/ast/rewriter/seq_split.cpp
+++ b/src/ast/rewriter/seq_split.cpp
@ -28,18 +28,26 @@ ast_manager&   seq_split::m()   const { return m_rw.m(); }
 seq_util&      seq_split::seq() const { return m_rw.u(); }
 seq_util::rex& seq_split::re()  const { return m_rw.u().re; }

+// Add <d, n> unless the (optional) lookahead oracle prunes it.
+void seq_split::push(split_set& out, split_oracle const& oracle, expr* d, expr* n) const {
+    if (!oracle || oracle(d, n))
+        out.push_back(split_pair(d, n, m()));
+}
+
 // Cross-product intersection of two split-sets (split algebra):
 //   S1 cap S2 = { <D1 cap D2, N1 cap N2> | <D1,N1> in S1, <D2,N2> in S2 }.
 // Pairs where any component is bottom (the empty regex) are dropped.
-bool seq_split::intersect(split_set const& s1, split_set const& s2, split_set& result, unsigned threshold) {
+bool seq_split::intersect(split_set const& s1, split_set const& s2, split_set& result,
+                          unsigned threshold, split_oracle const& oracle) {
    seq_util::rex& r = re();
    for (auto const& p1 : s1) {
        for (auto const& p2 : s2) {
            if (r.is_empty(p1.m_d) || r.is_empty(p2.m_d) ||
                r.is_empty(p1.m_n) || r.is_empty(p2.m_n))
                continue;
-            result.push_back(split_pair(m_rw.mk_regex_inter_normalize(p1.m_d, p2.m_d),
-                                        m_rw.mk_regex_inter_normalize(p1.m_n, p2.m_n), m()));
+            const expr_ref di(m_rw.mk_regex_inter_normalize(p1.m_d, p2.m_d), m());
+            const expr_ref ni(m_rw.mk_regex_inter_normalize(p1.m_n, p2.m_n), m());
+            push(result, oracle, di, ni);
            if (result.size() > threshold)
                return false;
        }
@ -52,23 +60,27 @@ bool seq_split::intersect(split_set const& s1, split_set const& s2, split_set& r
 // May produce up to 2^|sp| pairs (bounded by the threshold).  A threshold
 // overrun must abort entirely: a partial fold is a strictly weaker (unsound)
 // split-set, since each ~sp[i] further constrains ~S.
-bool seq_split::complement(sort* seq_sort, split_set const& sp, split_set& result, unsigned threshold) {
+bool seq_split::complement(sort* seq_sort, split_set const& sp, split_set& result,
+                           unsigned threshold, split_oracle const& oracle) {
    seq_util::rex& r = re();
    sort* re_sort = r.mk_re(seq_sort);
    const expr_ref full(r.mk_full_seq(re_sort), m());   // .*
    if (sp.empty()) {                                   // ~{} = <.*, .*>
-        result.push_back(split_pair(full, full, m()));
+        push(result, oracle, full, full);
        return true;
    }
+    // The acc/next pairs carry genuine output-orientation N components (the De
+    // Morgan ~<D,N> = {<~D,.*>, <.*,~N>}), so the oracle prunes them soundly and
+    // keeps the 2^|sp| fold from blowing up.
    split_set acc;
-    acc.push_back(split_pair(r.mk_complement(sp[0].m_d), full, m()));
-    acc.push_back(split_pair(full, r.mk_complement(sp[0].m_n), m()));
+    push(acc, oracle, r.mk_complement(sp[0].m_d), full);
+    push(acc, oracle, full, r.mk_complement(sp[0].m_n));
    for (unsigned i = 1; i < sp.size(); ++i) {
        split_set next;
-        next.push_back(split_pair(r.mk_complement(sp[i].m_d), full, m()));
-        next.push_back(split_pair(full, r.mk_complement(sp[i].m_n), m()));
+        push(next, oracle, r.mk_complement(sp[i].m_d), full);
+        push(next, oracle, full, r.mk_complement(sp[i].m_n));
        split_set tmp;
-        if (!intersect(acc, next, tmp, threshold))
+        if (!intersect(acc, next, tmp, threshold, oracle))
            return false;
        acc = std::move(tmp);
        if (acc.empty())            // intersection empty => ~S is empty
@ -80,14 +92,13 @@ bool seq_split::complement(sort* seq_sort, split_set const& sp, split_set& resul
    return true;
 }

-bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mode mode) {
+bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mode mode,
+                        split_oracle const& oracle) {
    SASSERT(r);
    seq_util& sq = seq();
    seq_util::rex& rex = re();
    ast_manager& mm = m();

-    // std::cout << "compute sigma of " << mk_pp(r, m()) << std::endl;
-
    sort* seq_sort = nullptr;
    if (!sq.is_re(r, seq_sort))
        return false;
@ -99,7 +110,7 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
    // epsilon: sigma(eps) = { <eps, eps> }
    if (rex.is_epsilon(r)) {
        const expr_ref eps(rex.mk_epsilon(seq_sort), mm);
-        result.push_back(split_pair(eps, eps, mm));
+        push(result, oracle, eps, eps);
        return true;
    }

@ -113,7 +124,7 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
            for (unsigned i = 0; i <= str.length(); ++i) {
                const expr_ref p(rex.mk_to_re(sq.str.mk_string(str.extract(0, i))), mm);
                const expr_ref q(rex.mk_to_re(sq.str.mk_string(str.extract(i, str.length() - i))), mm);
-                result.push_back(split_pair(p, q, mm));
+                push(result, oracle, p, q);
            }
            return true;
        }
@ -121,8 +132,8 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
        if (sq.str.is_unit(s)) {
            const expr_ref ex(r, mm);
            const expr_ref eps(rex.mk_epsilon(seq_sort), mm);
-            result.push_back(split_pair(eps, ex, mm));
-            result.push_back(split_pair(ex, eps, mm));
+            push(result, oracle, eps, ex);
+            push(result, oracle, ex, eps);
            return true;
        }
        // to_re over a non-literal sequence: not handled.
@ -134,15 +145,15 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
    if (rex.is_full_char(r) || rex.is_range(r) || rex.is_of_pred(r)) {
        const expr_ref ex(r, mm);
        const expr_ref eps(rex.mk_epsilon(seq_sort), mm);
-        result.push_back(split_pair(eps, ex, mm));
-        result.push_back(split_pair(ex, eps, mm));
+        push(result, oracle, eps, ex);
+        push(result, oracle, ex, eps);
        return true;
    }

    // .* : sigma(.*) = { <.*, .*> }
    if (rex.is_full_seq(r)) {
        const expr_ref ex(r, mm);
-        result.push_back(split_pair(ex, ex, mm));
+        push(result, oracle, ex, ex);
        return true;
    }

@ -150,7 +161,7 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
    if (rex.is_union(r)) {
        app* ap = to_app(r);
        for (unsigned i = 0; i < ap->get_num_args(); ++i) {
-            if (!compute(ap->get_arg(i), result, threshold, mode))
+            if (!compute(ap->get_arg(i), result, threshold, mode, oracle))
                return false;
        }
        return true;
@ -162,8 +173,11 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
        app* ap = to_app(r);
        const unsigned n = ap->get_num_args();
        for (unsigned i = 0; i < n; ++i) {
+            // Sound to pass the oracle into the sub-computation: N_inner.Sigma*
+            // over-approximates the final N_inner.right, so a prune here is a
+            // prune of the final pair too (prefix-compatible test).
            split_set sigma_arg;
-            if (!compute(ap->get_arg(i), sigma_arg, threshold, mode))
+            if (!compute(ap->get_arg(i), sigma_arg, threshold, mode, oracle))
                return false;

            expr_ref left(mm), right(mm);
@ -188,7 +202,7 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
            for (auto const& [d, nn] : sigma_arg) {
                const expr_ref p = m_rw.mk_re_append(left, d);
                const expr_ref q = m_rw.mk_re_append(nn, right);
-                result.push_back(split_pair(p, q, mm));
+                push(result, oracle, p, q);
            }
        }
        return true;
@ -197,14 +211,14 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
    // star: sigma(a*) = { <eps, eps> } cup a*.sigma(a).a*
    if (rex.is_star(r, a)) {
        const expr_ref eps(rex.mk_epsilon(seq_sort), mm);
-        result.push_back(split_pair(eps, eps, mm));
+        push(result, oracle, eps, eps);
        split_set sa;
-        if (!compute(a, sa, threshold, mode))
+        if (!compute(a, sa, threshold, mode, oracle))
            return false;
        for (auto const& [d, n] : sa) {
            const expr_ref p = m_rw.mk_re_append(r, d);   // a*.D
            const expr_ref q = m_rw.mk_re_append(n, r);   // N.a*
-            result.push_back(split_pair(p, q, mm));
+            push(result, oracle, p, q);
        }
        return true;
    }
@ -213,12 +227,12 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
    if (rex.is_plus(r, a)) {
        const expr_ref star(rex.mk_star(a), mm);          // a*
        split_set sa;
-        if (!compute(a, sa, threshold, mode))
+        if (!compute(a, sa, threshold, mode, oracle))
            return false;
        for (auto const& [d, n] : sa) {
            const expr_ref p = m_rw.mk_re_append(star, d);
            const expr_ref q = m_rw.mk_re_append(n, star);
-            result.push_back(split_pair(p, q, mm));
+            push(result, oracle, p, q);
        }
        return true;
    }
@ -230,16 +244,16 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
        app* ap = to_app(r);
        const unsigned n = ap->get_num_args();
        split_set current;
-        if (!compute(ap->get_arg(0), current, threshold, mode))
+        if (!compute(ap->get_arg(0), current, threshold, mode, oracle))
            return false;
        // A give-up on any conjunct must propagate as a give-up: silently treating
        // it as the empty split-set would collapse the whole intersection to bottom
        // and be misreported as an (unsound) conflict.
        for (unsigned i = 1; i < n && !current.empty(); ++i) {
            split_set arg_i, tmp;
-            if (!compute(ap->get_arg(i), arg_i, threshold, mode))
+            if (!compute(ap->get_arg(i), arg_i, threshold, mode, oracle))
                return false;
-            if (!intersect(current, arg_i, tmp, threshold))
+            if (!intersect(current, arg_i, tmp, threshold, oracle))
                return false;
            current = std::move(tmp);
        }
@ -247,28 +261,31 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo
        return true;
    }

-    // complement: sigma(~a) = ~sigma(a)
+    // complement: sigma(~a) = ~sigma(a).
+    // The body is computed WITHOUT the oracle (the body's pairs are inverted, so
+    // their N is unrelated to the output N); the oracle is re-applied in complement().
    if (rex.is_complement(r, a)) {
        if (mode == split_mode::weak)
            return false;
        split_set sa;
        if (!compute(a, sa, threshold, mode))
            return false;
-        return complement(seq_sort, sa, result, threshold);
+        return complement(seq_sort, sa, result, threshold, oracle);
    }

-    // difference: a \ b = a & ~b ; sigma(a \ b) = sigma(a) cap ~sigma(b)
+    // difference: a \ b = a & ~b ; sigma(a \ b) = sigma(a) cap ~sigma(b).
+    // sigma(b) (used only inside the complement) is computed WITHOUT the oracle.
    if (rex.is_diff(r, a, b)) {
        if (mode == split_mode::weak)
            return false;
        split_set sa, sb, sb_compl, tmp;
-        if (!compute(a, sa, threshold, mode))
+        if (!compute(a, sa, threshold, mode, oracle))
            return false;
        if (!compute(b, sb, threshold, mode))
            return false;
-        if (!complement(seq_sort, sb, sb_compl, threshold))
+        if (!complement(seq_sort, sb, sb_compl, threshold, oracle))
            return false;
-        if (!intersect(sa, sb_compl, tmp, threshold))
+        if (!intersect(sa, sb_compl, tmp, threshold, oracle))
            return false;
        result.append(tmp);
        return true;
@ -276,7 +293,6 @@ bool seq_split::compute(expr* r, split_set& result, unsigned threshold, split_mo

    // bounded loop / ite / other: not handled (paper "v1: bail").
    TRACE(seq, tout << "seq_split: unsupported regex " << mk_pp(r, mm) << "\n";);
-    std::cout << "seq_split: unsupported regex " << mk_pp(r, mm) << std::endl;
    return false;
 }

--- a/src/ast/rewriter/seq_split.h
+++ b/src/ast/rewriter/seq_split.h
@ -33,6 +33,7 @@ Author:

 #include "ast/seq_decl_plugin.h"
 #include "ast/rewriter/seq_subset.h"
+#include <functional>

 class seq_rewriter;

@ -57,6 +58,11 @@ typedef vector<split_pair> split_set;
 //            give up (return false) on complement / intersection instead.
 enum class split_mode { weak, strong };

+// Optional lookahead oracle.  Called for each candidate split <D, N> as it is
+// generated; returns true to keep it, false to prune it.  An empty oracle (the
+// default) keeps everything, so sigma is unchanged.  See seq_split::compute.
+typedef std::function<bool(expr* D, expr* N)> split_oracle;
+
 class seq_split {
    seq_rewriter& m_rw;       // for mk_re_append + manager / seq_util access
    seq_subset    m_subset;   // language-subset checks for subsumption
@ -65,13 +71,18 @@ class seq_split {
    seq_util&      seq() const;
    seq_util::rex& re() const;

+    // Push <d, n> onto `out`, unless `oracle` rejects it.
+    void push(split_set& out, split_oracle const& oracle, expr* d, expr* n) const;
+
    // S1 cap S2 = { <D1 cap D2, N1 cap N2> } dropping any pair with a bottom
-    // component.  Returns false on threshold overrun.
-    bool intersect(split_set const& s1, split_set const& s2, split_set& result, unsigned threshold);
+    // component (and any rejected by `oracle`).  Returns false on threshold overrun.
+    bool intersect(split_set const& s1, split_set const& s2, split_set& result,
+                   unsigned threshold, split_oracle const& oracle);

    // De Morgan complement of a split-set: ~S = cap_{s in S} ~s with
    //   ~<D,N> = { <~D, .*>, <.*, ~N> }  and  ~{} = { <.*, .*> }.
-    bool complement(sort* seq_sort, split_set const& sp, split_set& result, unsigned threshold);
+    bool complement(sort* seq_sort, split_set const& sp, split_set& result,
+                    unsigned threshold, split_oracle const& oracle);

    // same-D / same-N merge: groups pairs that share a (syntactically identical)
    // left (resp. right) component and unions the other component.
@ -84,7 +95,16 @@ public:
    // bounds the number of produced splits; an overrun, an unsupported regex
    // shape (bounded loop / ite), or a Boolean-closure case in weak mode makes
    // it return false ("give up").
-    bool compute(expr* r, split_set& out, unsigned threshold, split_mode mode = split_mode::strong);
+    //
+    // `oracle` (optional) prunes non-viable splits *during* generation.  It must
+    // be sound to apply at every generation step: a candidate N can still gain a
+    // prefix from a factor appended to its right later (concat/star), so the
+    // oracle must use a "prefix-compatible" test (prune only when N can never
+    // match the lookahead, even partially), NOT a strict "starts-with" test.
+    // The complement body is computed WITHOUT the oracle (inverted orientation);
+    // the oracle is re-applied to the complement's output fold.
+    bool compute(expr* r, split_set& out, unsigned threshold,
+                 split_mode mode = split_mode::strong, split_oracle const& oracle = {});

    // In-place simplification of a split-set: drop bottom components, apply the
    // same-D / same-N merge rules, and drop splits subsumed by another (using
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -2458,7 +2458,7 @@ namespace seq {
                    euf::snode* tail = get_tail(var_head, a.mk_int(1), fwd);
                    euf::snode* replacement = dir_concat(m_sg, char_head, tail, fwd);
                    child = mk_child(node);
-                    e = mk_edge(node, child, "nielsen const >", false);
+                    e = mk_edge(node, child, "nielsen const &gt;", false);
                    e->add_side_constraint(mk_constraint(a.mk_ge(compute_length_expr(tail), a.mk_int(0)), eq.m_dep));
                    const nielsen_subst s2(var_head, replacement, eq.m_dep);
                    e->add_subst(s2);
@ -2505,7 +2505,7 @@ namespace seq {
                {
                    euf::snode* replacement = dir_concat(m_sg, rhead, get_tail(lhead, compute_length_expr(rhead).get(), fwd), fwd);
                    nielsen_node* child = mk_child(node);
-                    nielsen_edge* e = mk_edge(node, child, "nielsen var >", false);
+                    nielsen_edge* e = mk_edge(node, child, "nielsen var &gt;", false);
                    const nielsen_subst s(lhead, replacement, eq.m_dep);
                    e->add_subst(s);
                    child->apply_subst(m_sg, s);
@ -2514,7 +2514,7 @@ namespace seq {
                {
                    euf::snode* replacement = dir_concat(m_sg, lhead, get_tail(rhead, compute_length_expr(lhead).get(), fwd), fwd);
                    nielsen_node* child = mk_child(node);
-                    nielsen_edge* e = mk_edge(node, child, "nielsen var <", false);
+                    nielsen_edge* e = mk_edge(node, child, "nielsen var &lt;", false);
                    const nielsen_subst s(rhead, replacement, eq.m_dep);
                    e->add_subst(s);
                    child->apply_subst(m_sg, s);
@ -3180,13 +3180,13 @@ namespace seq {

                    // Branch 1: pow_exp < count (i.e., count >= pow_exp + 1)
                    {
-                        nielsen_edge *e = mk_edge(node, mk_child(node), "power elim >", true);
+                        nielsen_edge *e = mk_edge(node, mk_child(node), "power elim &gt;", true);
                        const expr_ref pow_plus1(a.mk_add(pow_exp, a.mk_int(1)), m);
                        e->add_side_constraint(mk_constraint(a.mk_ge(norm_count, pow_plus1), eq.m_dep));
                    }
                    // Branch 2: count <= pow_exp (i.e., pow_exp >= count)
                    {
-                        nielsen_edge *e = mk_edge(node, mk_child(node), "power elim <=", true);
+                        nielsen_edge *e = mk_edge(node, mk_child(node), "power elim &le;", true);
                        e->add_side_constraint(mk_constraint(a.mk_ge(pow_exp, norm_count), eq.m_dep));
                    }
                    return true;
@ -3246,7 +3246,7 @@ namespace seq {

        euf::snode* replacement = dir_concat(m_sg, base, nested_power_snode, fwd);
        child = mk_child(node);
-        e = mk_edge(node, child, "unwinding >", true);
+        e = mk_edge(node, child, "unwinding &gt;", true);
        const nielsen_subst s2(power, replacement, eq->m_dep);
        e->add_subst(s2);
        child->apply_subst(m_sg, s2);
@ -3576,6 +3576,30 @@ namespace seq {
    // Modifier: apply_regex_factorization (Boolean Closure)
    // -----------------------------------------------------------------------

+    // Lookahead oracle for the split engine: is the split's right component
+    // `n_regex` prefix-compatible with the constant character sequence `c`?
+    // The factorization picks a boundary so the tail starts with c, hence the
+    // tail-regex ∇ must be able to match c as a prefix.  We use a *prefix* test
+    // (not strict "starts-with"): we accept as soon as N accepts a prefix of c
+    // (a suffix appended downstream can complete it).  This is sound to apply
+    // during split generation — it never drops a viable split.
+    bool nielsen_graph::split_lookahead_viable(expr* n_regex, zstring const& c) {
+        euf::snode* cur = m_sg.mk(n_regex);
+        if (!cur)
+            return true;   // conservative: keep
+        for (unsigned i = 0; i < c.length(); ++i) {
+            if (m_sg.re_nullable(cur) == l_true)
+                return true;            // N accepts the prefix c[0..i) → a suffix completes it
+            cur = m_sg.brzozowski_deriv(cur, m_sg.mk_char(c[i]));
+            if (!cur || cur->is_fail())
+                return false;           // N went (syntactically) dead before reaching c
+        }
+        // Consumed all of c without matching a prefix: keep iff δ_c(N) is non-empty
+        // (one BFS emptiness check rather than per-char; an empty-language state is
+        // never nullable, so the loop above can't wrongly early-accept it).
+        return !m_seq_regex->is_empty_regex(cur);
+    }
+
    bool nielsen_graph::apply_regex_factorization(nielsen_node* node) {
        if (m_regex_factorization_threshold == 0)
            return false;
@ -3603,15 +3627,50 @@ namespace seq {

            euf::snode* first = mem.m_str->first();
            SASSERT(first);
-            SASSERT(!first->is_char());
-            euf::snode* tail = m_sg.drop_first(mem.m_str);
-            SASSERT(tail);
+            SASSERT(!first->is_char());     // constants are consumed earlier
+
+            // Choose the factorization boundary so the tail starts with the
+            // LONGEST run of concrete characters c — this gives the split-engine
+            // lookahead oracle the most pruning information.  head = u' (tokens
+            // before the run), tail = c · u''' (tokens from the run onward).
+            euf::snode_vector toks;
+            mem.m_str->collect_tokens(toks);
+            const unsigned total = toks.size();
+            unsigned run_start = 0, run_len = 0;
+            for (unsigned i = 0; i < total; ) {
+                if (!toks[i]->is_char()) { ++i; continue; }
+                unsigned j = i;
+                while (j < total && toks[j]->is_char()) ++j;
+                if (j - i > run_len) { run_len = j - i; run_start = i; }
+                i = j;
+            }
+            // No constant run → fall back to splitting off the first token.
+            const unsigned p = (run_len == 0) ? 1 : run_start;
+            SASSERT(p >= 1);
+            euf::snode* head = (p == 1) ? first : m_sg.drop_right(mem.m_str, total - p);
+            euf::snode* tail = m_sg.drop_left(mem.m_str, p);
+            SASSERT(head && tail);
+
+            // Build the constant lookahead c and (if non-empty) an oracle that
+            // prunes splits whose ∇ cannot match c.
+            zstring c;
+            for (unsigned i = 0; i < run_len; ++i) {
+                expr* ch; unsigned cv;
+                VERIFY(m_seq.str.is_unit(toks[run_start + i]->get_expr(), ch));
+                VERIFY(m_seq.is_const_char(ch, cv));
+                c = c + zstring(cv);
+            }
+            split_oracle oracle;
+            if (!c.empty())
+                oracle = [this, c](expr*, expr* n) { return split_lookahead_viable(n, c); };

            // Decompose the regex into a split-set via the shared seq_split engine
-            // (sigma from the paper): first ∈ Δ ∧ tail ∈ ∇ for each ⟨Δ,∇⟩.
+            // (sigma from the paper): head ∈ Δ ∧ tail ∈ ∇ for each ⟨Δ,∇⟩, with the
+            // lookahead oracle pruning non-viable ∇ during generation.
            split_set pairs;
            seq_rewriter rw(m);
-            if (!rw.split(mem.m_regex->get_expr(), pairs, m_regex_factorization_threshold))
+            if (!rw.split(mem.m_regex->get_expr(), pairs, m_regex_factorization_threshold,
+                          split_mode::strong, oracle))
                continue;

            rw.simplify_split(pairs);
@ -3623,14 +3682,18 @@ namespace seq {
                euf::snode* sn_p = m_sg.mk(tp);
                euf::snode* sn_q = m_sg.mk(tq);

-                // Also check intersection with other primitive constraints on `first`
+                // Also check intersection with other primitive constraints on `head`.
+                // Only valid when head is the single token `first`; for a multi-token
+                // head Δ constrains the whole prefix, so we only check Δ ≠ ∅.
                ptr_vector<euf::snode> regexes_p;
                regexes_p.push_back(sn_p);
                dep_tracker first_filter_dep = nullptr;
-                for (auto const& prev_mem : node->str_mems()) {
-                    if (prev_mem.m_str == first) {
-                        regexes_p.push_back(prev_mem.m_regex);
-                        first_filter_dep = m_dep_mgr.mk_join(first_filter_dep, prev_mem.m_dep);
+                if (head == first) {
+                    for (auto const& prev_mem : node->str_mems()) {
+                        if (prev_mem.m_str == first) {
+                            regexes_p.push_back(prev_mem.m_regex);
+                            first_filter_dep = m_dep_mgr.mk_join(first_filter_dep, prev_mem.m_dep);
+                        }
                    }
                }
                if (m_seq_regex->check_intersection_emptiness(regexes_p, 100) == l_true) {
@ -3666,7 +3729,7 @@ namespace seq {
                    }
                }

-                child->add_str_mem(str_mem(first, m_p, m_dep));
+                child->add_str_mem(str_mem(head, m_p, m_dep));
                child->add_str_mem(str_mem(tail, m_q, m_dep));
            }
            return true;
@ -4246,7 +4309,7 @@ namespace seq {
            euf::snode* power_snode = m_sg.mk(power_expr);
            euf::snode* replacement = dir_concat(m_sg, base, power_snode, fwd);
            nielsen_node* child = mk_child(node);
-            nielsen_edge* e = mk_edge(node, child, "unwinding eq >", false);
+            nielsen_edge* e = mk_edge(node, child, "unwinding eq &gt;", false);
            const nielsen_subst s(power, replacement, eq->m_dep); // TODO review - ensure var does not occur in replacement.
            e->add_subst(s);
            child->apply_subst(m_sg, s);
@ -4290,7 +4353,7 @@ namespace seq {

            euf::snode* replacement = dir_concat(m_sg, base, power_snode, fwd);
            nielsen_node* child = mk_child(node);
-            nielsen_edge* e = mk_edge(node, child, "unwinding mem >", false);
+            nielsen_edge* e = mk_edge(node, child, "unwinding mem &gt;", false);
            const nielsen_subst s(power, replacement, mem->m_dep); // TODO review - ensure var does not occur in replacement.
            e->add_subst(s);
            child->apply_subst(m_sg, s);
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -1316,6 +1316,13 @@ namespace seq {
        // generalized regex factorization (Boolean closure derivation rule)
        bool apply_regex_factorization(nielsen_node* node);

+        // Lookahead oracle for apply_regex_factorization's split() call: returns
+        // true iff the split's right component `n_regex` is prefix-compatible with
+        // the constant character sequence `c` (the tail of the factorization starts
+        // with c).  Prunes splits whose tail-regex can never match c.  Sound to
+        // apply during split generation (prefix-, not strict-, match).
+        bool split_lookahead_viable(expr* n_regex, zstring const& c);
+
        // helper for apply_gpower_intr: fires the substitution.
        // `fwd=true` uses left-to-right decomposition; `fwd=false` mirrors ZIPT's
        // backward (right-to-left) direction.
--- a/src/smt/seq/seq_nielsen_pp.cpp
+++ b/src/smt/seq/seq_nielsen_pp.cpp
@ -471,7 +471,6 @@ namespace seq {
                }
            } else if (tok->is_unit()) {
                // seq.unit with non-literal character: show the character expression
-                std::cout << mk_pp(e, m) << std::endl;
                expr* ch = to_app(e)->get_arg(0);
                if (is_app(ch) && to_app(ch)->get_num_args() == 0)
                    result += "[" + dot_html_escape(to_app(ch)->get_decl()->get_name().str(), html_escape) + "]";
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -287,13 +287,13 @@ namespace smt {
    void theory_nseq::assign_eh(bool_var v, bool is_true) {
        try {
            expr* e = ctx.bool_var2expr(v);
-            //std::cout << "assigned [" << sat::literal(v, !is_true) << "] " << mk_pp(e, m) << " = " << is_true << std::endl;
+            const literal lit(v, !is_true);
+            //std::cout << "assigned [" << lit << "] " << mk_pp(e, m) << " = " << is_true << std::endl;
            expr *s = nullptr, *re = nullptr, *a = nullptr, *b = nullptr;
            TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";);
            if (m_seq.str.is_in_re(e, s, re)) {
                euf::snode* sn_str = get_snode(s);
                euf::snode* sn_re  = get_snode(re);
-                const literal lit(v, !is_true);
                const seq::dep_tracker dep = nullptr;
                if (is_true) {
                    ctx.push_trail(restore_vector(m_prop_queue));
@ -314,18 +314,63 @@ namespace smt {
                }
            }
            else if (m_seq.str.is_prefix(e)) {
+                zstring str;
+                if (m_seq.str.is_string(to_app(e)->get_arg(0), str)) {
+                    // prefix(u, v) with u const => v \in u \Sigma^*
+                    const expr_ref pre(m_seq.re.mk_in_re(to_app(e)->get_arg(1), m_seq.re.mk_concat(
+                        m_seq.re.mk_to_re(to_app(e)->get_arg(0)),
+                        m_seq.re.mk_full_seq(m_seq.re.mk_re(to_app(e)->get_arg(0)->get_sort()))
+                        )), m);
+                    ctx.internalize(pre, false);
+                    literal l = ctx.get_literal(pre);
+                    if (!is_true)
+                        l = ~l;
+                    ctx.mk_th_axiom(get_id(), ~lit, l);
+                    return;
+                }
                if (is_true)
                    m_axioms.prefix_true_axiom(e);
                else
                    m_axioms.prefix_axiom(e);
            }
            else if (m_seq.str.is_suffix(e)) {
+                zstring str;
+                if (m_seq.str.is_string(to_app(e)->get_arg(0), str)) {
+                    // suffix(u, v) with u const => v \in \Sigma* u
+                    const expr_ref suff(m_seq.re.mk_in_re(to_app(e)->get_arg(1), m_seq.re.mk_concat(
+                        m_seq.re.mk_full_seq(m_seq.re.mk_re(to_app(e)->get_arg(0)->get_sort())),
+                        m_seq.re.mk_to_re(to_app(e)->get_arg(0))
+                        )), m);
+                    ctx.internalize(suff, false);
+                    literal l = ctx.get_literal(suff);
+                    if (!is_true)
+                        l = ~l;
+                    ctx.mk_th_axiom(get_id(), ~lit, l);
+                    return;
+                }
                if (is_true)
                    m_axioms.suffix_true_axiom(e);
                else
                    m_axioms.suffix_axiom(e);
            }
            else if (m_seq.str.is_contains(e)) {
+                zstring str;
+                if (m_seq.str.is_string(to_app(e)->get_arg(1), str)) {
+                    // contains(u, v) with v const => u \in \Sigma* v \Sigma^*
+                    sort* re_sort = m_seq.re.mk_re(to_app(e)->get_arg(0)->get_sort());
+                    expr* all = m_seq.re.mk_full_seq(re_sort);
+                    const expr_ref con(m_seq.re.mk_in_re(to_app(e)->get_arg(0), m_seq.re.mk_concat(
+                        all,
+                        m_seq.re.mk_concat(
+                            m_seq.re.mk_to_re(to_app(e)->get_arg(1)), all)
+                    )), m);
+                    ctx.internalize(con, false);
+                    literal l = ctx.get_literal(con);
+                    if (!is_true)
+                        l = ~l;
+                    ctx.mk_th_axiom(get_id(), ~lit, l);
+                    return;
+                }
                if (is_true)
                    m_axioms.contains_true_axiom(e);
                else
@ -457,7 +502,7 @@ namespace smt {

        expr* const re = mem.m_regex->get_expr();
        expr* const s = mem.m_str->get_expr();
-        // std::cout << "Propagating: " << seq::mem_pp(mem, m) << std::endl;
+        std::cout << "Propagating: " << seq::mem_pp(mem, m) << std::endl;

        if (!mem.m_str->is_empty()) {
            if (mem.m_str->first()->is_char()) {
@ -721,7 +766,6 @@ namespace smt {
            }
            else if (std::holds_alternative<mem_item>(item)) {
                auto const& mem = std::get<mem_item>(item);
-                std::cout << "Adding " << seq::mem_pp(mem, m) << std::endl;
                int triv = m_regex.check_trivial(mem);
                if (triv > 0) {
                    ++num_mems;