/*++ Copyright (c) 2026 Microsoft Corporation Module Name: theory_seq_len.cpp Abstract: Theory solver for sequence length constraints using partial axiom instantiation. See theory_seq_len.h for details. Author: Nikolaj Bjorner (nbjorner) 2026-03-10 --*/ #include "smt/theory_seq_len.h" #include "smt/smt_context.h" #include "ast/ast_pp.h" namespace smt { theory_seq_len::theory_seq_len(context& ctx) : theory(ctx, ctx.get_manager().mk_family_id("seq")), m_util(ctx.get_manager()), m_autil(ctx.get_manager()) {} // ----------------------------------------------------------------------- // Internalization // ----------------------------------------------------------------------- bool theory_seq_len::internalize_atom(app* atom, bool /*gate_ctx*/) { expr* s = nullptr, *r = nullptr; // Handle (str.in_re s r): register a bool_var so assign_eh fires. if (m_util.str.is_in_re(atom, s, r)) { // Ensure the string argument has an enode. if (!ctx.e_internalized(s)) ctx.internalize(s, false); if (!ctx.b_internalized(atom)) { bool_var bv = ctx.mk_bool_var(atom); ctx.set_var_theory(bv, get_id()); ctx.mark_as_relevant(bv); } // Ensure the string has a theory variable so length is accessible. mk_var(ctx.get_enode(s)); return true; } return internalize_term(atom); } bool theory_seq_len::internalize_term(app* term) { // Internalize all children first. for (expr* arg : *term) { if (!ctx.e_internalized(arg)) ctx.internalize(arg, false); } if (!ctx.e_internalized(term)) ctx.mk_enode(term, false, false, true); enode* n = ctx.get_enode(term); if (!is_attached_to_var(n)) mk_var(n); // If this is a seq.len term, assert non-negativity immediately. expr* inner = nullptr; if (m_util.str.is_length(term, inner)) add_length_non_neg(inner); return true; } // ----------------------------------------------------------------------- // assign_eh: fires when a bool_var (e.g. str.in_re) is assigned. // ----------------------------------------------------------------------- void theory_seq_len::assign_eh(bool_var v, bool is_true) { if (!is_true) return; // only axiomatize positive memberships expr* e = ctx.bool_var2expr(v); expr* s = nullptr, *r = nullptr; if (!m_util.str.is_in_re(e, s, r)) return; literal lit(v, false); // positive literal for the membership add_regex_length_axioms(s, r, lit); } // ----------------------------------------------------------------------- // Model building // ----------------------------------------------------------------------- void theory_seq_len::init_model(model_generator& mg) { mg.register_factory(alloc(seq_factory, m, get_family_id(), mg.get_model())); } // ----------------------------------------------------------------------- // mk_fresh: create a copy of this theory for a fresh context. // ----------------------------------------------------------------------- theory* theory_seq_len::mk_fresh(context* new_ctx) { return alloc(theory_seq_len, *new_ctx); } // ----------------------------------------------------------------------- // display // ----------------------------------------------------------------------- void theory_seq_len::display(std::ostream& out) const { out << "theory_seq_len\n"; } // ----------------------------------------------------------------------- // add_length_non_neg: assert (seq.len e) >= 0 // ----------------------------------------------------------------------- void theory_seq_len::add_length_non_neg(expr* e) { expr_ref len(m_util.str.mk_length(e), m); expr_ref ge0(m_autil.mk_ge(len, m_autil.mk_int(0)), m); if (!ctx.b_internalized(ge0)) ctx.internalize(ge0, true); literal lit = ctx.get_literal(ge0); ctx.mark_as_relevant(lit); literal_vector lits; lits.push_back(lit); assert_axiom(lits); } // ----------------------------------------------------------------------- // get_length_constraint: extract a semi-linear length constraint from r. // // On success, sets: // period == 0: lengths in r are exactly {base} // period > 0: lengths in r are {base + k*period : k >= 0} // // Returns false if no useful constraint can be extracted. // ----------------------------------------------------------------------- bool theory_seq_len::has_fixed_length(expr* r, unsigned& len) const { unsigned mn = m_util.re.min_length(r); unsigned mx = m_util.re.max_length(r); if (mn == mx && mx != UINT_MAX) { len = mn; return true; } return false; } bool theory_seq_len::get_length_constraint(expr* r, unsigned& base, unsigned& period) const { expr* r1 = nullptr, *r2 = nullptr; unsigned lo = 0, hi = 0; // Empty regex: no string can match -> handled by conflict, not length. if (m_util.re.is_empty(r)) return false; // Full sequence: any length is possible. if (m_util.re.is_full_seq(r)) return false; // Fixed-length regex (min == max). { unsigned flen = 0; if (has_fixed_length(r, flen)) { base = flen; period = 0; return true; } } // (R1)* where R1 has fixed length n: lengths = {k*n : k >= 0} if (m_util.re.is_star(r, r1)) { unsigned n = 0; if (has_fixed_length(r1, n) && n > 0) { base = 0; period = n; return true; } return false; } // (R1)+ where R1 has fixed length n: lengths = {k*n : k >= 1} = {n + k*n : k >= 0} if (m_util.re.is_plus(r, r1)) { unsigned n = 0; if (has_fixed_length(r1, n) && n > 0) { base = n; period = n; return true; } return false; } // Loop {lo, hi} where R1 has fixed length n: // lengths = {lo*n, (lo+1)*n, ..., hi*n} (a range, not necessarily simple period) // For now, just assert the lower bound as "base" and give min length as constraint. if (m_util.re.is_loop(r, r1, lo, hi)) { unsigned n = 0; if (has_fixed_length(r1, n)) { // Lengths are exactly multiples of n between lo*n and hi*n. // We express this as a range: the constraint len = lo*n + k*n for 0<=k<=hi-lo. // Since our schema only handles one arithmetic progression, just use // base=lo*n, period=n (and an upper bound will be added separately). base = lo * n; period = (hi > lo) ? n : 0; return true; } return false; } // Loop with lower bound only (no upper bound). if (m_util.re.is_loop(r, r1, lo)) { unsigned n = 0; if (has_fixed_length(r1, n) && n > 0) { base = lo * n; period = n; return true; } return false; } // Concatenation R1 ++ R2: if both parts have constraints, combine. if (m_util.re.is_concat(r, r1, r2)) { unsigned b1 = 0, p1 = 0, b2 = 0, p2 = 0; bool ok1 = get_length_constraint(r1, b1, p1); bool ok2 = get_length_constraint(r2, b2, p2); if (ok1 && ok2) { // If both are fixed length: if (p1 == 0 && p2 == 0) { base = b1 + b2; period = 0; return true; } // If one is fixed and the other periodic: if (p1 == 0) { base = b1 + b2; period = p2; return true; } if (p2 == 0) { base = b1 + b2; period = p1; return true; } // Both periodic: only combine if same period. if (p1 == p2) { base = b1 + b2; period = p1; return true; } // Incompatible periods: fall through. } // Partial: at least assert minimum length. return false; } // Option (R1)?: lengths = {0, n} where n is fixed length of R1. if (m_util.re.is_opt(r, r1)) { unsigned n = 0; if (has_fixed_length(r1, n)) { // lengths are 0 or n base = 0; period = (n > 0) ? n : 0; return true; } return false; } return false; } // ----------------------------------------------------------------------- // add_regex_length_axioms // ----------------------------------------------------------------------- void theory_seq_len::add_regex_length_axioms(expr* s, expr* r, literal lit) { expr_ref len(m_util.str.mk_length(s), m); // Always assert the minimum length constraint: // (s in R) => |s| >= min_length(R) unsigned min_len = m_util.re.min_length(r); if (min_len > 0) { expr_ref ge_min(m_autil.mk_ge(len, m_autil.mk_int(min_len)), m); if (!ctx.b_internalized(ge_min)) ctx.internalize(ge_min, true); literal ge_min_lit = ctx.get_literal(ge_min); ctx.mark_as_relevant(ge_min_lit); literal_vector lits; lits.push_back(~lit); lits.push_back(ge_min_lit); assert_axiom(lits); } else { // At minimum assert non-negativity. add_length_non_neg(s); } // Assert maximum length constraint if finite: // (s in R) => |s| <= max_length(R) unsigned max_len = m_util.re.max_length(r); if (max_len != UINT_MAX) { expr_ref le_max(m_autil.mk_le(len, m_autil.mk_int(max_len)), m); if (!ctx.b_internalized(le_max)) ctx.internalize(le_max, true); literal le_max_lit = ctx.get_literal(le_max); ctx.mark_as_relevant(le_max_lit); literal_vector lits; lits.push_back(~lit); lits.push_back(le_max_lit); assert_axiom(lits); } // Extract semi-linear length constraint. unsigned base = 0, period = 0; if (!get_length_constraint(r, base, period)) return; if (period == 0) { // Fixed length: (s in R) => |s| = base expr_ref eq_base(m.mk_eq(len, m_autil.mk_int(base)), m); if (!ctx.b_internalized(eq_base)) ctx.internalize(eq_base, true); literal eq_lit = ctx.get_literal(eq_base); ctx.mark_as_relevant(eq_lit); literal_vector lits; lits.push_back(~lit); lits.push_back(eq_lit); assert_axiom(lits); } else { // Semi-linear: lengths are {base + k * period : k >= 0} // Assert: // (s in R) => |s| >= base // (s in R) => |s| mod period = base mod period (only when period >= 2) // |s| >= base expr_ref ge_base(m_autil.mk_ge(len, m_autil.mk_int(base)), m); if (!ctx.b_internalized(ge_base)) ctx.internalize(ge_base, true); literal ge_base_lit = ctx.get_literal(ge_base); ctx.mark_as_relevant(ge_base_lit); { literal_vector lits; lits.push_back(~lit); lits.push_back(ge_base_lit); assert_axiom(lits); } // |s| mod period = base mod period // Skip when period <= 1: period == 0 is already handled above; // period == 1 makes the constraint trivially true (n mod 1 = 0). if (period >= 2) { expr_ref mod_len(m_autil.mk_mod(len, m_autil.mk_int(period)), m); expr_ref eq_mod(m.mk_eq(mod_len, m_autil.mk_int(base % period)), m); if (!ctx.b_internalized(eq_mod)) ctx.internalize(eq_mod, true); literal eq_mod_lit = ctx.get_literal(eq_mod); ctx.mark_as_relevant(eq_mod_lit); { literal_vector lits; lits.push_back(~lit); lits.push_back(eq_mod_lit); assert_axiom(lits); } } TRACE(seq, tout << "seq_len: (s in R) => |s| >= " << base << (period >= 2 ? " && |s| mod " + std::to_string(period) + " = " + std::to_string(base % period) : "") << " for s=" << mk_pp(s, m) << " r=" << mk_pp(r, m) << "\n";); } } // ----------------------------------------------------------------------- // Helper: mk_literal // ----------------------------------------------------------------------- literal theory_seq_len::mk_literal(expr* e) { if (!ctx.b_internalized(e)) ctx.internalize(e, true); literal lit = ctx.get_literal(e); ctx.mark_as_relevant(lit); return lit; } // ----------------------------------------------------------------------- // Helper: assert_axiom (th_axiom in the SMT context) // ----------------------------------------------------------------------- void theory_seq_len::assert_axiom(literal_vector& lits) { // Skip if already satisfied. for (literal l : lits) if (ctx.get_assignment(l) == l_true) return; for (literal l : lits) ctx.mark_as_relevant(l); ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); } }