From 54d52d882f07f219a2102540c4273a42ed33f9f6 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Mon, 30 Mar 2026 20:00:02 -0700 Subject: [PATCH 1/3] remove indirect references to ast_manager Signed-off-by: Nikolaj Bjorner --- src/smt/seq/seq_nielsen.cpp | 46 ++++++++++++++----------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 113b0c093..ae2b8ee3f 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -3212,9 +3212,8 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::apply_power_split(nielsen_node* node) { - ast_manager& m = m_sg.get_manager(); arith_util arith(m); - seq_util& seq = m_sg.get_seq_util(); + seq_util &seq = m_seq; euf::snode* power = nullptr; euf::snode* var_head = nullptr; @@ -3325,7 +3324,6 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::apply_var_num_unwinding_eq(nielsen_node* node) { - ast_manager& m = m_sg.get_manager(); arith_util arith(m); euf::snode* power = nullptr; @@ -3371,7 +3369,6 @@ namespace seq { } bool nielsen_graph::apply_var_num_unwinding_mem(nielsen_node* node) { - ast_manager& m = m_sg.get_manager(); arith_util arith(m); euf::snode* power = nullptr; @@ -3456,8 +3453,7 @@ namespace seq { // ----------------------------------------------------------------------- expr_ref nielsen_graph::compute_length_expr(euf::snode* n) { - ast_manager& m = m_sg.get_manager(); - seq_util& seq = m_sg.get_seq_util(); + seq_util &seq = m_seq; arith_util arith(m); if (n->is_empty()) @@ -3489,7 +3485,6 @@ namespace seq { void nielsen_graph::generate_length_constraints(vector& constraints) { SASSERT(m_root); - ast_manager& m = m_sg.get_manager(); uint_set seen_vars; seq_util& seq = m_sg.get_seq_util(); @@ -3580,7 +3575,6 @@ namespace seq { // ----------------------------------------------------------------------- expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) { - ast_manager& m = m_sg.get_manager(); SASSERT(var && var->is_var()); SASSERT(mod_count > 0); @@ -3706,7 +3700,6 @@ namespace seq { if (node == m_root) return; - ast_manager& m = m_sg.get_manager(); arith_util arith(m); uint_set seen_vars; @@ -3775,7 +3768,6 @@ namespace seq { // fall through - ask the solver [expensive] // TODO: Maybe cache the result? - ast_manager& m = m_sg.get_manager(); arith_util arith(m); // The solver already holds all path constraints incrementally. @@ -3805,7 +3797,6 @@ namespace seq { } expr_ref nielsen_graph::mk_fresh_int_var() { - ast_manager& m = m_sg.get_manager(); arith_util arith(m); std::string name = "n!" + std::to_string(m_fresh_cnt++); return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); @@ -3862,7 +3853,6 @@ namespace seq { if (result == l_true) { m_solver.get_model(mdl); IF_VERBOSE(1, if (mdl) { - ast_manager& m = m_sg.get_manager(); verbose_stream() << " raw_model:\n"; for (unsigned i = 0; i < mdl->get_num_constants(); ++i) { func_decl* fd = mdl->get_constant(i); @@ -3887,8 +3877,6 @@ namespace seq { if (!regex->is_ground()) return false; - seq_util& seq = m_sg.get_seq_util(); - ast_manager& mgr = m_sg.get_manager(); // Build the overapproximation regex for the string. // Variables → intersection of their primitive regex constraints (or Σ*) @@ -3907,7 +3895,7 @@ namespace seq { // Concrete character → to_re(unit(c)) expr* te = tok->get_expr(); SASSERT(te); - expr_ref tre(seq.re.mk_to_re(te), mgr); + expr_ref tre(m_seq.re.mk_to_re(te), m); tok_re = m_sg.mk(tre); } else if (tok->is_var()) { @@ -3917,8 +3905,8 @@ namespace seq { tok_re = x_range; else { // Unconstrained variable: approximate as Σ* - sort* str_sort = seq.str.mk_string_sort(); - expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); + sort* str_sort = m_seq.str.mk_string_sort(); + expr_ref all_re(m_seq.re.mk_full_seq(m_seq.re.mk_re(str_sort)), m); tok_re = m_sg.mk(all_re); } } @@ -3930,16 +3918,16 @@ namespace seq { // Build union of re.range for each interval euf::snode* range_re = nullptr; for (auto const& r : cs.ranges()) { - expr_ref lo(seq.mk_char(r.m_lo), mgr); - expr_ref hi(seq.mk_char(r.m_hi - 1), mgr); - expr_ref rng(seq.re.mk_range( - seq.str.mk_string(zstring(r.m_lo)), - seq.str.mk_string(zstring(r.m_hi - 1))), mgr); + expr_ref lo(m_seq.mk_char(r.m_lo), m); + expr_ref hi(m_seq.mk_char(r.m_hi - 1), m); + expr_ref rng(m_seq.re.mk_range( + m_seq.str.mk_string(zstring(r.m_lo)), + m_seq.str.mk_string(zstring(r.m_hi - 1))), m); euf::snode* rng_sn = m_sg.mk(rng); if (!range_re) range_re = rng_sn; else { - expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr); + expr_ref u(m_seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), m); range_re = m_sg.mk(u); } } @@ -3948,15 +3936,15 @@ namespace seq { } if (!tok_re) { // Unconstrained symbolic char: approximate as full_char (single char, any value) - sort* str_sort = seq.str.mk_string_sort(); - expr_ref fc(seq.re.mk_full_char(seq.re.mk_re(str_sort)), mgr); + sort* str_sort = m_seq.str.mk_string_sort(); + expr_ref fc(m_seq.re.mk_full_char(m_seq.re.mk_re(str_sort)), m); tok_re = m_sg.mk(fc); } } else { // Unknown token type — approximate as Σ* - sort* str_sort = seq.str.mk_string_sort(); - expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); + sort* str_sort = m_seq.str.mk_string_sort(); + expr_ref all_re(m_seq.re.mk_full_seq(m_seq.re.mk_re(str_sort)), m); tok_re = m_sg.mk(all_re); } @@ -3968,7 +3956,7 @@ namespace seq { expr* ae = approx->get_expr(); expr* te = tok_re->get_expr(); SASSERT(ae && te); - expr_ref cat(seq.re.mk_concat(ae, te), mgr); + expr_ref cat(m_seq.re.mk_concat(ae, te), m); approx = m_sg.mk(cat); } } @@ -3981,7 +3969,7 @@ namespace seq { expr* ae = approx->get_expr(); expr* re = regex->get_expr(); SASSERT(ae && re); - expr_ref inter(seq.re.mk_inter(ae, re), mgr); + expr_ref inter(m_seq.re.mk_inter(ae, re), m); euf::snode* inter_sn = m_sg.mk(inter); SASSERT(inter_sn); // std::cout << "HTML: " << snode_label_html(inter_sn, m()) << std::endl; From 14f71ea852634bae0761fc39cd3477a99e3ee7a1 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Tue, 31 Mar 2026 16:36:10 +0200 Subject: [PATCH 2/3] Reuse power variables and symbolic characters --- src/smt/seq/seq_nielsen.cpp | 78 ++++++++++++++++++++++++++++++++---- src/smt/seq/seq_nielsen.h | 19 +++++++++ tests/ostrich.zip | Bin 115740 -> 115751 bytes 3 files changed, 89 insertions(+), 8 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index ae2b8ee3f..635833601 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -394,7 +394,8 @@ namespace seq { m_solver(solver), m_parikh(alloc(seq_parikh, sg)), m_seq_regex(alloc(seq::seq_regex, sg)), - m_len_vars(sg.get_manager()) { + m_len_vars(sg.get_manager()), + m_gpower_vars(sg.get_manager()) { } nielsen_graph::~nielsen_graph() { @@ -489,6 +490,10 @@ namespace seq { m_mod_cnt.reset(); m_len_var_cache.clear(); m_len_vars.reset(); + m_char_var_cache.clear(); + m_gpower_n_var_cache.clear(); + m_gpower_m_var_cache.clear(); + m_gpower_vars.reset(); m_dep_mgr.reset(); m_solver.reset(); } @@ -2886,8 +2891,11 @@ namespace seq { base_str = seq.str.mk_concat(tok_expr, base_str); } + unsigned mc = 0; + m_mod_cnt.find(var->id(), mc); + // Create fresh exponent variable and power expression: base^n - expr_ref fresh_n = mk_fresh_int_var(); + expr_ref fresh_n = get_or_create_gpower_n_var(var, mc); expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m); euf::snode* power_snode = m_sg.mk(power_expr); if (!power_snode) @@ -2922,7 +2930,7 @@ namespace seq { expr* inner_exp = get_power_exponent(tok); expr* inner_base = get_power_base_expr(tok, seq); if (inner_exp && inner_base) { - fresh_m = mk_fresh_int_var(); + fresh_m = get_or_create_gpower_m_var(var, mc); expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_m), m); euf::snode* partial_sn = m_sg.mk(partial_pow); euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn; @@ -3178,8 +3186,11 @@ namespace seq { expr_ref char_expr(m_sg.get_seq_util().str.mk_string(zstring(cs.first_char())), m_sg.get_manager()); fresh_char = m_sg.mk(char_expr); } - else - fresh_char = mk_fresh_char_var(); + else { + unsigned mc = 0; + m_mod_cnt.find(first->id(), mc); + fresh_char = get_or_create_char_var(first, mc); + } euf::snode* replacement = m_sg.mk_concat(fresh_char, first); nielsen_node* child = mk_child(node); @@ -3224,7 +3235,6 @@ namespace seq { SASSERT(power->is_power() && power->num_args() >= 1); euf::snode* base = power->arg(0); - expr* exp_n = get_power_exponent(power); expr* zero = arith.mk_int(0); // Branch 1: enumerate all decompositions of the base. @@ -3238,7 +3248,10 @@ namespace seq { if (!base_expr || base_len == 0) return false; - expr_ref fresh_m = mk_fresh_int_var(); + unsigned mc = 0; + m_mod_cnt.find(var_head->id(), mc); + + expr_ref fresh_m = get_or_create_gpower_n_var(var_head, mc); expr_ref power_m_expr(seq.str.mk_power(base_expr, fresh_m), m); euf::snode* power_m_sn = m_sg.mk(power_m_expr); if (!power_m_sn) @@ -3265,7 +3278,7 @@ namespace seq { expr* inner_exp = get_power_exponent(tok); expr* inner_base = get_power_base_expr(tok, seq); if (inner_exp && inner_base) { - fresh_inner_m = mk_fresh_int_var(); + fresh_inner_m = get_or_create_gpower_m_var(var_head, mc); expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_inner_m), m); euf::snode* partial_sn = m_sg.mk(partial_pow); euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn; @@ -3592,6 +3605,55 @@ namespace seq { return fresh; } + euf::snode* nielsen_graph::get_or_create_char_var(euf::snode* var, unsigned mod_count) { + SASSERT(var && var->is_var()); + + auto key = std::make_pair(var->id(), mod_count); + auto it = m_char_var_cache.find(key); + if (it != m_char_var_cache.end()) + return it->second; + + std::string name = "c!" + std::to_string(var->id()) + "!" + std::to_string(mod_count); + sort* char_sort = m_seq.mk_char_sort(); + expr_ref fresh_const(m.mk_fresh_const(name.c_str(), char_sort), m); + expr_ref unit(m_seq.str.mk_unit(fresh_const), m); + euf::snode* fresh = m_sg.mk(unit); + m_char_var_cache.insert({key, fresh}); + return fresh; + } + + expr_ref nielsen_graph::get_or_create_gpower_n_var(euf::snode* var, unsigned mod_count) { + SASSERT(var && var->is_var()); + + auto key = std::make_pair(var->id(), mod_count); + auto it = m_gpower_n_var_cache.find(key); + if (it != m_gpower_n_var_cache.end()) + return expr_ref(it->second, m); + + arith_util arith(m); + std::string name = "gpn!" + std::to_string(var->id()) + "!" + std::to_string(mod_count); + expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); + m_gpower_vars.push_back(fresh); + m_gpower_n_var_cache.insert({key, fresh.get()}); + return fresh; + } + + expr_ref nielsen_graph::get_or_create_gpower_m_var(euf::snode* var, unsigned mod_count) { + SASSERT(var && var->is_var()); + + auto key = std::make_pair(var->id(), mod_count); + auto it = m_gpower_m_var_cache.find(key); + if (it != m_gpower_m_var_cache.end()) + return expr_ref(it->second, m); + + arith_util arith(m); + std::string name = "gpm!" + std::to_string(var->id()) + "!" + std::to_string(mod_count); + expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); + m_gpower_vars.push_back(fresh); + m_gpower_m_var_cache.insert({key, fresh.get()}); + return fresh; + } + void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) { auto const& substs = e->subst(); bool has_non_elim = false; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index cd53d4f2e..93ed889e4 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -775,6 +775,16 @@ namespace seq { // Pins the fresh length variable expressions so they aren't garbage collected. expr_ref_vector m_len_vars; + // Cache: (var snode id, modification count) → fresh character variable + std::map, euf::snode*> m_char_var_cache; + + // Cache: (var snode id, modification count) → fresh integer variable + std::map, expr*> m_gpower_n_var_cache; + std::map, expr*> m_gpower_m_var_cache; + + // Pins the fresh gpower variable expressions so they aren't garbage collected. + expr_ref_vector m_gpower_vars; + // Arena for dep_tracker nodes. Declared mutable so that const methods // (e.g., explain_conflict) can call mk_join / linearize. mutable dep_manager m_dep_mgr; @@ -1140,6 +1150,15 @@ namespace seq { // modification count. Returns str.len(var_expr) when mod_count == 0. expr_ref get_or_create_len_var(euf::snode* var, unsigned mod_count); + // Get or create a fresh character variable for a variable at a given modification count. + euf::snode* get_or_create_char_var(euf::snode* var, unsigned mod_count); + + // Get or create a fresh integer variable for gpower n (full exponent) at a given modification count. + expr_ref get_or_create_gpower_n_var(euf::snode* var, unsigned mod_count); + + // Get or create a fresh integer variable for gpower m (partial exponent) at a given modification count. + expr_ref get_or_create_gpower_m_var(euf::snode* var, unsigned mod_count); + // Compute and add |x| = |u| length constraints to an edge for all // its non-eliminating substitutions. Uses current m_mod_cnt. // Temporarily bumps m_mod_cnt for RHS computation, then restores. diff --git a/tests/ostrich.zip b/tests/ostrich.zip index 2b02bb194ba8005f7dd6791ca9399b937db1f21b..54131706e07f74177afd8f22f60079d9c5a2c7be 100644 GIT binary patch delta 7962 zcmZX330#j^)co_l}%d*4?dpYOTfv)t|6bMI5@7isTUq@88Ko|x*dFte4Pc%5So zW+n3QxoKGJ1J?LyI5bPky;mBugZ^iGorsy0RjPN!#i;+MIp19(SLOEG>#R2?L|S`;?xHm&yJ+-Z|6<|f#AynGsR zz__dT%9X7TMea4+8qelN`_JyTvzx)bk`}>lf?|ri8t!@jpF3ViT4HKe;ZrlK+vK~A z`}aEd%HwSNyvu<(WfMG7e6pn6bG=8rkIZYjdag^)HOCgOjV>5m=H$2SSv}vu+Bz3^ zn5ni(k~9(|$#<}<_?G{1Pv<8xPYC%cN*85^=R|7A_KX5TR(d=Ds zgPAS=+3g#9VetL;!8sRNHvN0JuU?#f;>Es^BWvFFSQ$AXF)|}QeR#UojN4uG7kZf9 zdF1|Ph1st&ztr@(u%&9zFaQ2mJnT(en3jvRsa4Y2MV4^|o8tq=1V5f1f3sw9X7lZ) zL*24wq*;6I*tFgMeG|VgcB%mmd6&#n+sz-B;cMUhlI_D+I<^r$HVXJ z*D+ZZlomPC!gYN7Os{{u=lvzGX=!;;+uEwU!tdq^lhtA8)6!E?vhDNIvmUqmP~FHc zqEpwH&!z@;cit>NacJ|`18T32^Ug0&r}SKVwQ@{q^Y{f9T=OF)&J8)X<9?G@twW0w z9ZFKWem=Je_{RhjvFE?Y)1qXGrHbgNY|< z-aSihx>e`?w&?7jsMv>@OXd{(d2_&!=t|?fO{-599`~MjC(E*C>XLiL5yu{O-t4qs$jYTp`$o?% zxzO+Kh6A4(T!*(``fjUycDwv+yV||Q@tu3jTzu_|o_XhxL6o|>p6>56{c zWO_YkI?GjCpgDu4dX!8Fd(Xt&q7#&*ZHmq#pgB8#FnNW!oqW^loZ8WRpo$Ag%{mBS zm!&ZmaEu|CvXcjO+5HyziV4fKYt8msFXwVUn`ayyT0Z2c+v+aIGFuO3*E->+T;4iG zBmcryw)yp^>}NYmBOjHOv1QtwmLJe~hS|RYF1R}$6~ec+`#^q5s(hznAxv(63gp|w zbaFia@_p{@!pu8n{glf)W^y^M(^U;;%|^MY>*$-au+B+8+x5Gwx&PIY~$ z5NvO^=Nwmbf6p(Xb6&tkrGy0din`r-JlLa_}#0X5bh4{!Ufi7$afyw)Tgb` z^d0IfG*f(E^4MeiUdoTD56$NHVw;9Va-zQfCk0mwf6AK`!_RRH54L9cBO9=L0rN#W zaHj^_vmQaI8rg|e2Bq+_93Ff_!#T1sBT@wysK^ajlaW(3HcHIpas`dUBjiFI&XDy8 z&D6AeGTl*IxST)gjI!f=%)br>6PB)H$aam+=gKl$8#Xg6Q?8_KXfURaid*`Qz05Ix zlo@*!;l?IJMAvDrIKX8i$asP3g9e;(RZ^G{!TTOb&(J1kmg6a>6l*>wl15@@3Au72*2EEN~z z?OHM~0eZV?IM&}?%@wt~y?7yFV#jsZ>NAB(6!owYUtzNC>=Ge#I&Uq6pbPVa)1!<3 z@gDD8`jg|C%Xhi&hEg5T&(5pbLO5_;S0zvMIj25ZrpFmt_)hHm&0P5p+ORFRo^f1$ z+k~zAqgs*G%WqTVQIt7%z6!kif#d3XHQZq3eGmq#AAIAAHRZL!;Nb^e6SE$@;O#8! z<}CYh3712kJmuKu>0cb%KKq+v)8|Jy-gKFgeiYPC^s@He9!Id`;cu@|k z>ZpGBHcea?VO5l;%1vTgP1l8TOWb+CQ!sNrZWm13|Aumb&8m6FBTf4JFVCo`FRwWU z)PCgX^YtyqKHo(9=T?V(-_I3w2)9*nu;TxaI0|69!eV_Dd{t8E{~ye-?`{|_L!!ds zA2PgAgu?S$kOExlTOay?l8>074d1x1SO@NLVVo{hDng-;9?)iz^uNzzOWdjt$OH8< z0BsS{vjH^bl5)G>yt5D<~^Nl1UV`tJqn{6Lwzx@ zNloA&r|g;nU0cd9ipDj=p=PjIq4B*L)X4SkmEt>dc*+fLSwJnnvKss*&? zTU*%41_J8wO)ayv#RIl*KxtsK9o*r9yFJZcVJscsiV}~CgDp04q_Ha9opz*NNL{$C zEzlWIq=W5%-ZP4H(HTsH^vDI83+ZbIFcXrQ8$LS*w4AJI2QoLU z=sG$*_E77GZy?)UFG{0xXa4nn@L|kBJ5s}!{PbO{^P`X6%v1L_4R6KP=@~ICy{XDl z(hQaM49#LFvvp+C^vB9K`I0H^wThoQ&%jEh7^$xrHs)twg-K)f#pnYsdDX^rsHo%} zFwxuuX`!;2Fs@-1_Tr8^12CYC(08TO2YAXTs>X2(LX%A^Ipi-lqG z?=2)Yv!gBdoD-)QTI0qzQiR(`tbvtgfQttORl^YlC|esN+IgtT%~ppB;`SlsDT;%;y(j_MMNbnk$76%6Z$u1SQkop3RsT5MR4UY}a5x z+#o{Lr5nCUrV!n_AUr)2Myob@q0KDHjoE`KZ^K&i z_yx1bpo2GFokc@Q_K}#)STV+kUnx}op%SYaD~?lsI+ z67Pscb1Ateg-EPXdZZ~EJx$Wd2~4Mh2j2r5TIHT zj@t7H-6L@Hd@3=IBPI6Rbg=|)=TnrK<4}JA1Q?o)m!zc0;qg-cG?9#G!JoLp3@>I=`n9*|Q>`_z<9T$T|{Sz}KDLj(7+BfCgbmFwz{i<+D zXq*Yoc!3ysO&V?GDqC)1^BZ-zCLMeJMnfAhM`F>dUAW=_Ddd*(B<8ZV4JSG+AAhxg`!xqD0?Y{s^(|l=%p}H9=lj#lX9AK$2Ut!KlL}%%cT3Z?-Ggm zZnor#tV~k$Sc-cU#pz`fahoGo^j=1Nm#vW4RvYbDHyP#r+rpGCX3Yc*;whd|ZNH5f*ycW&*E5{-Uy>G!|y!oU=(my?I% zdTh7?{2Tnao?9e^$HY%@66>~M1$D7ygTxBdHt51ArABR(SocF>c34_m~xVG;t!F=;p>PWszt^4bR9&f7Mxbl9K+Uww<_Zd z=B=lEn^!``=)M@Y*9NfAUwBrMV&WsCrYDSJRgFdOJ4qogIVZ80a*_P1jTATIJX&p} zQjl>0Q#aCtrd*`XAB#1qzllNxT>2R*y9nCjNFoioUH(ZrVfZw#5P7tZbaLxbiNP}w zL6t*Y7+*oF9Pm?Jxq^u~;IEy1Rgz+(;xT;|SPH*dGLzG;Ni4KdETvyA`FUQ)xLgR* z)h&~xxP-{4*g78H6oY+bXt5cBbQax^r1e+x~#RZ{7AzL%s);ghGJ?_97!qdahy9X?3to(EyF+eZnD zjiCt^<-tJN`;(^C+mMR0%RZ_YHveg;)O|2Wu2-XJmG1*jx#~x&XFlnkf95(TKH01J zG^W#EB;42tL_xLOPfWx&3Hvk#QAy*8(Vp;~1~)khUDLoyWYuZKu%!g}y$Oi(W>P?# z;0)Nl0Q}^)0C>erqdB5zDph!^00QN+GGML+h^&b_KvB-rS{46zDr!@o&wFO%&R)ec5zzD0=bbmP;1M1sDJ3g)KgJh@Q2=Fg^Xv-P$G%zyy zAKwSh8=>(bx;qbTL8LohjU3gXnyI6^aB+zRmsMd;QBs}!uX#lb}sd6hkGFQRs!WA;&MHfe_; zM=4z_od7?3fViG&j?&PrXcfQo14rz0jP|x&dqCX*npD|$j5?@okC%^8*Ed}N_YVYd zXu^(@=YChfrJkD1J*F_rIsnEFBIX3G*0he;@dS-Mq!TVvu)7=nLnzyH2K?p?{EWED zI$Z!Ohk!U*&rVX1YGQu((S+@FiWuky_-LqxS*tK|cfcpU8s?+IydkF45A2jn%?yVX z)5-hL1LKOR<1fG9rDEufk*8@odc83HG?7hvWBF-Xn8tmu%NaUZTJC^V^wG_C@a!3~ zyylKi&QNwO>kBw@q$VpSmXIdc0~eOiE;{tXCnd^6BES&OhSDf1`T;%-1z&kZe^_2B zf=w7j3cmrs7HguPPzd@32kWD1K`Uj3#-r)Ue4dE!L?~~dh|y#*%M&ntH250y90bZU z-6=LIQFH6+hLQf?LBQJ96A?y*QG|Riz?3i=ar9tdIrT*vYmFgOCvQNTF{G*X2G&kT zX!a_a?L&Y)7DRS9wI})jT82}}F&aw2!XZSrXsD+5pRY0fvGkyy;j5uF53*52%Dwym zQ^(Q?3G)ZMuLx5EfaM!G@oA<-&>&|A092E(p<=rCyPe#(S qp%19y_Mo2}Sctp7oGBWCel96PS zak=J2S=kxarhd=+ocHHm?)TR}ujjm;ah|iE_ow`28rjP7x)W1ybt7KC@K$i$jYR!ISwpHjng0uA1%n2X*}qUm5bjB= z!Nh}1{DrrKa*zK6WTs1hv0{`CbQ|>lnOcL|;QypA_jc}o`G_I^$(5OEgWXWFLe-@M zJRM4+Rjn+Qz~-tijPWMpf1&nDt>Sba(n4fBN=@@m`x4eFJ~x6iB+TN5pGtb)1(SxnBJbKr(4)bVbX_I(6kEuj7Dm7hI$)C{+%-AdUbb^ z36C{8a>2A=V^Oi~P8rSpWC(Tri6-@Iw3iX}1}&+TPCAqK>pWn%PWLXud3rY`gbRA4 zHFa!kPS@&3{!Fa-xnV@ro5cM}9B;DVXTqE=F_X$NBz^DW0NV# zO5K(gJfLH%=S*qd`XNI_o4*-;ZS$5HzHaNz43*v6YjF=3+wV*>z)p{AM%Xvvnh6dM zn8u@nHP=Kq+Hg&#Q!R_C+tO0GwWG+jJ3HZKkdi6s?8AjdT|082UpI9wcy#Z^1iGk4 zJ+ocX)0S(Ly&SmaQExtiD}A1`Er%!BkXEUg(bvBvNd%qa{E6X=zSRsz^gAhGhPyrW z?7xJI#G&tsnV;d3+l6nc=(97Z{e0xy(E>$3AD6IPL*+3=6>hEQPWO+RKIO z!G&B%4$0<1yU=x9I2D$~h4GVQrhf7zk-1+9&(~yuV`o2P7#v+rdx@sBuZ>zf#-F^# z^T^~%zd@0Cj`~O4&&3-*esn9m{kqL_x2+7>(r5R)MBmVD>vJpnFS(oi{hPVMY31UC ztDfUpdUxy)-|2o#cNcwOXcfG#Jn3Ad z|H088Y=;dw=zq?mVvFH&FH=}j8k^TpL$1((`fEvzbyZ=q~| zpmoXW1>1FkH6EI!>FDgNds@@1$a`tYl&v8ZH9l9? zmek0%Ev?A9`0ndUtvyF3++V*z{a)>oyjrIJt zAZzM}WRFp`R<8Tryt56^FwL~PnOt?rW#)p&It}kZ8k&Jam;Vq%K`7FuKI&#ve~u&D zG6OIMZk4>2F>IIpbI7RY+;T~w)Sc%>*mL0@yO|4h^QUnkB5pPl=#%&_(#@@WkZ_(b zVei62T!>hV*RrItNvdIjsslqjhnlp{vK{Qof4;1S;kD%#ne~?BCd`_at~kpSNhu$h zB0hB$d#aR8R;ekN#)PuIY17R2%tCv$Cx6T?3~EOcA%+=!hYt+@Ui+OHz0We`Y3Z*I zR+oM>@B7@Pt-~8HobN7JHwl& zxZ&l~Y?Y<{%;@zq6)Z~j*=mLf=bkd0dj1)6^1o2Z(6I6Z!#kCnKYj6>)K+F*I>>wT zzLL)jzFxV?@bVvz8A|J4eKn2A^#)e-)gVo}{@QY>t!(orPKK1C(~X-98{NFa@b}xD zl;*eMc}?;AZQQ()`S8Ewa--S3ZJarNe=8TJR(mml9(nMFMcDiB9m6e;UNKz#_#?x# zC$$U{YFPWJm1PfxV`xCzsij-XE0^cSFa^hHh4XkXSlG| zg=cv74PTaXZ*MWnt?$@GDbwEbbyH=aI-1t>(ci#g#eLA=v6g*o%46kz;*%Zyxr)Ua z_2oIk+)w@Kvabb9?)HtZl-l=PkyR%7by&Shv%a@}pG54RD3!Pp(cZT?tR!TU#6oA0 ze34jiRFGO7ex1ld);iS*$M{R_RN0xZG6&Fq7JTnQ zW~kgAGQqDKc_s)nfB^(k4o^d?xVN`QNIx^sijz<&g3H#0{Zg&RGU>QVyz*&pZo!7*q@X!JGoet)P`U>fUqyb40oxD zRfqBcWQWR%-F)l=Rt-+N5G!bPh_qE;WLzF;(nFYR^B^Es2^&1c-1` zS=k*%%0?8x<1q17JU}5^60#Hm)JnoAH33F@sH|g-ptY{L0D7J(>byjqN9uwner2yoqat%RS=Axi*9g-K2RE%yUz!)E5$9?U}k&4w7V7{-4 zN|C4xeF3(YlK@3dV*xh!kq+G4`#9=$H^J%hC$^m0BT>Z$IJDzve##Its~{s4l}!b> zIGVJBpe)?YXDf(_;s#@_eS|6 zJ-$JWBE(*xZ%t&zV*=W??f{_^h`(6l2uCO2>}~2OP)7?(?&BSb7J;20Jd}*oYTa27 zW`<4-jiKo}rd(f#dc~X00*$k3&6$y5xFb$=QCWKH$&reX7gu(Li(zD#!lxUK+eXeJ za3aQ-)m@;ZmCTe#Oi&M;bQRNh5_-G!1p7%isy@90dcKWpTOnzjdc%Fxh_QX3`((tW zzd^c$NzU*bQL(nKKwI0(9tl&>BflSHPeDyee}S5IkTo5rqNZ>F44sO;H(VfJ!n3Yx z!qM=xn?UDxk`0kDReW<7=$0!(fX&VZt;Fk}?R6NO)>67)2nS z83K_KejEyCBy8giR*{H?J}?haQ)?JIJ`NBZg!^A}ItDKrCeR_y@@$SpMlA7#nCZA9 z)(wZs>12c=Xavq!Ke_wh8EDbe4;IY83m ziTjR@5~!7%4PVO{v(VCYG^EZVBN~1g%~OhsgsK^&6IeuHQWawa>N8L_kCvE)V+E@3 zAv2Z8h-1gWvnUdvVIP12W{-zNeQB>shbwV5s`UdQXEyOulm`mbYp|SS$7s~3jfbJp znA^4SkRMHki^(DIBbtoRc9|dr3JaSYV-HRf@p4%)hnQ-7=PY;;(wv9ChqPE63SZ~o z1vV=TH<+&l*DQ`fP3MV_8-tqH6LDpS%NaY&Ma|tw;5nE0>7SS^2vMQ4qU>jeP7eT13RaUy??9oTpi5^!KJbq7Hs>C%`H zk49z7AT=HrB5%1souc`1HvjhnPY?_RV|>B*-+{y>$pW<#vw+?Fr{?^dTqvRwSFGqu{Jz&3__=yMh!V(Fa z7s1`-#JACnBIXbkJUL<(P4_Y8F{dVDFPTb#9?Ox_sv3vP(0u~ks3tSV&_*=h4}T@& zweog9IITcDa{!VgEG~vS67DJi#}qu4LrP(J3O*QlhlPx-@7qgDktr;*`W0VmLI`Yg2bS%+_FWg6PNItkCx z$S_g=H1t`Cn05xzSCTQ}^0N{e(9cE9_^>*x!a*dS!}(X5a%KTCqThMQT7`Z=mGEj6 z@ztJk3HykNni(7t1ws3WBgCeoRrD2rbiDNzUd8w5ur>E@oq-Vpt_k$ekrteZk(f?@ z3Ut*`dC%UKn4Z@qrYSU{WUx5!29!}^LXRAiS2d4f2W~edHdOSu1+6o2zq#DT*{*2E z!&YRXh5H@2pNTti@L$kpHSTVYy8x@nXl;*sf-pH^LTF56*uQINyGBanK6ZOXF6H8p z5t~-Q`ZYM%FI7;BsCZN@(0=D-i&#LPD-Q(fa6v9SFCrtRJcQ?ftLgU$oYxX>(fKi? zuf?|J6L_)~SNCiUIA>w3h^P42UXjnxFjugWlCg3_~eMt;F;i*MC@V3ddz6iYk^kXkf*#F88NUHeyqp)$Kb6% zr{9))%*aN~nRl=|8*^ImH)!M_cKrxJh>C}wu)L|3_wKtKwCwN&bT?qBW%U(Hl80^i zJa6BC8l!J;aswGHHuw%!za!RvhXsg=w?FXet&uHFaxt^#bpmyNDl@Z@5i9B;Jr`}2 z0-^fPWsUttv@IqCJU3#8#Ug>NlCVNS=+l?-Iqtm)Z5FB#djFMta8o7bojRdjwel`_ zE-`&I2+esT&qm)oiP0o5E{_aXG}j_D=3PrZlv+up){xNP_i`6*o3Y-~)F#4oyt6_f zbvfw_ImjuVX%i^jOac`j8j*}|Ms(6gBOW7g3u+tbGBW;?e2k7sWHl#;f0nNd>#azh z)FY6)6~DlGe31hzlITKxMh7bL8e=4T9@;G*HHl3Kz4y(GGixNqrYWHbb>`A7>!jV@ z2y>kos(QWomg%_-%^nz$)z0{%5&K3-k(k%b2n-^sFPZyDiF=IU+cw;4&k zlZ+Ezw}JCJNq~61E!Y;|! z>j1BJ;SO2pNZ_Rjk+(?lZp>zPM*`YrD(ZrfMXF3_@0Y| zhA`9tOD#5p%S9OOr7OHF!pHHj8vzqXB5$mCCFVul3A}MseI?zLsObX*s^e`U=7`OGJRVDw8- za&9;Q_i@CQ-9w2b81eQ9$SJ{yHGc4=1S3B7hv5hDSX~=Qz^b2WYgHda(+#8G$3fyF z295^rQZh`Z?HD3`*9JyK22PFu$EBnrRF|UNmof08l(aQeZy?n|w#1eUn Date: Tue, 31 Mar 2026 08:56:31 -0700 Subject: [PATCH 3/3] add split function for unit_regex Signed-off-by: Nikolaj Bjorner --- src/smt/seq/seq_nielsen.cpp | 108 +++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 635833601..73b1e3858 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -30,15 +30,15 @@ NSB review: #include "smt/seq/seq_regex.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" +#include "ast/ast_util.h" #include "ast/rewriter/seq_rewriter.h" #include "ast/rewriter/th_rewriter.h" #include "ast/rewriter/seq_skolem.h" +#include "ast/rewriter/var_subst.h" #include "sat/smt/arith_solver.h" -#include "util/hashtable.h" #include "util/statistics.h" #include #include -#include #include namespace seq { @@ -2239,7 +2239,13 @@ namespace seq { if (apply_const_nielsen(node)) return ++m_stats.m_mod_const_nielsen, true; - // Priority 9: SignatureSplit - heuristic string equation splitting + + // Priority 9: RegexUnitSplit - split str_mem c·s ∈ R by minterms of R + if (apply_regex_unit_split(node)) + return ++m_stats.m_mod_regex_unit_split, true; + + + // Priority 9b: SignatureSplit - heuristic string equation splitting if (m_signature_split && apply_signature_split(node)) return ++m_stats.m_mod_signature_split, true; @@ -3120,6 +3126,101 @@ namespace seq { } return false; } + + bool nielsen_graph::apply_regex_unit_split(nielsen_node *node) { + for (str_mem const &mem : node->str_mems()) { + SASSERT(mem.m_str && mem.m_regex); + if (mem.is_primitive()) + continue; + euf::snode *first = mem.m_str->first(); + if (!first || !first->is_char_or_unit()) + continue; + + // Take derivative of R w.r.t. :var 0 (canonical, cached), then + // substitute inner_char for :var 0. Resulting ite conditions are + // on inner_char. + seq_rewriter rw(m); + expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m); + if (!d) + continue; + + // Extract the inner char expression from seq.unit(?inner) + expr *unit_expr = first->get_expr(), *inner_char; + VERIFY(m_seq.str.is_unit(unit_expr, inner_char)); + + var_subst vs(m); + d = vs(d, inner_char); + + + euf::snode *rest = m_sg.drop_first(mem.m_str); + bool created = false; + + // Worklist: (regex_expr, accumulated_char_set). + // Call decompose_ite in a loop until no more ite sub-expressions, + // branching on c=true and c=false and accumulating char constraints. + vector> worklist; + worklist.push_back({d, expr_ref_vector(m)}); + + while (!worklist.empty()) { + auto [r, cs] = std::move(worklist.back()); + worklist.pop_back(); + + if (m_seq.re.is_empty(r)) + continue; + + expr_ref c(m), th(m), el(m); + if (!bool_rewriter(m).decompose_ite(r, c, th, el)) { + // No ite remaining: leaf → create child node + euf::snode *deriv_snode = m_sg.mk(r); + nielsen_node *child = mk_child(node); + for (auto f : cs) + child->add_constraint(constraint(f, mem.m_dep, m)); + mk_edge(node, child, true); + for (str_mem &cm : child->str_mems()) + if (cm.m_id == mem.m_id) { + cm.m_str = rest; + cm.m_regex = deriv_snode; + break; + } + created = true; + continue; + } + + // Substitute inner_char into condition and simplify + th_rewriter tr(m); + expr_ref c_simp(c, m); + tr(c_simp); + + if (m.is_true(c_simp)) { + // Condition is always satisfied: only then-branch + if (!m_seq.re.is_empty(th)) + worklist.push_back({th, std::move(cs)}); + } + else if (m.is_false(c_simp)) { + // Condition is never satisfied: only else-branch + if (!m_seq.re.is_empty(el)) + worklist.push_back({el, std::move(cs)}); + } + else { + // Branch on c=true and c=false, accumulate char constraints + if (!m_seq.re.is_empty(th)) { + expr_ref_vector cs_true(cs); + cs_true.push_back(c); + worklist.push_back({th, std::move(cs_true)}); + } + if (!m_seq.re.is_empty(el)) { + expr_ref_vector cs_false(cs); + cs_false.push_back(mk_not(c)); + worklist.push_back({el, std::move(cs_false)}); + } + } + } + + if (created) + return true; + } + return false; + } // ----------------------------------------------------------------------- // Modifier: apply_regex_var_split // For str_mem x·s ∈ R where x is a variable, split using minterms: @@ -4102,6 +4203,7 @@ namespace seq { st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen); st.update("nseq mod signature split", m_stats.m_mod_signature_split); st.update("nseq mod regex var", m_stats.m_mod_regex_var_split); + st.update("nseq mod regex unit", m_stats.m_mod_regex_unit_split); st.update("nseq mod power split", m_stats.m_mod_power_split); st.update("nseq mod var nielsen", m_stats.m_mod_var_nielsen); st.update("nseq mod var num unwind (eq)", m_stats.m_mod_var_num_unwinding_eq);