diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index a6a61c3c0..8d3112d4f 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -57,7 +57,7 @@ void smt_params::updt_local_params(params_ref const & _p) { m_nseq_max_nodes = p.nseq_max_nodes(); m_nseq_parikh = p.nseq_parikh(); m_nseq_regex_precheck = p.nseq_regex_precheck(); - m_nseq_regex_factorization = p.nseq_regex_factorization(); + m_nseq_regex_factorization_threshold = p.nseq_regex_factorization_threshold(); m_nseq_signature = p.nseq_signature(); m_up_persist_clauses = p.up_persist_clauses(); validate_string_solver(m_string_solver); @@ -172,7 +172,7 @@ void smt_params::display(std::ostream & out) const { DISPLAY_PARAM(m_string_solver); DISPLAY_PARAM(m_nseq_parikh); DISPLAY_PARAM(m_nseq_regex_precheck); - DISPLAY_PARAM(m_nseq_regex_factorization); + DISPLAY_PARAM(m_nseq_regex_factorization_threshold); DISPLAY_PARAM(m_profile_res_sub); DISPLAY_PARAM(m_display_bool_var2expr); diff --git a/src/params/smt_params.h b/src/params/smt_params.h index da8daa405..6c3806a02 100644 --- a/src/params/smt_params.h +++ b/src/params/smt_params.h @@ -252,7 +252,7 @@ struct smt_params : public preprocessor_params, unsigned m_nseq_max_nodes = 0; bool m_nseq_parikh = false; bool m_nseq_regex_precheck = true; - bool m_nseq_regex_factorization = true; + unsigned m_nseq_regex_factorization_threshold = 1; bool m_nseq_signature = false; smt_params(params_ref const & p = params_ref()): diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index 5b71e76d7..01042deed 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -129,7 +129,7 @@ def_module_params(module_name='smt', ('nseq.max_nodes', UINT, 0, 'maximum number of DFS nodes explored by theory_nseq per solve() call (0 = unlimited)'), ('nseq.parikh', BOOL, False, 'enable Parikh image checks in nseq solver'), ('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'), - ('nseq.regex_factorization', BOOL, True, 'enable syntactic regex factorization in theory_nseq: decomposes Boolean closure of regular expressions into primitive membership constraints'), + ('nseq.regex_factorization_threshold', UINT, 1, 'maximum number of cases to factor a classical regex into in a single step (gives completeness on classical regexes)'), ('nseq.signature', BOOL, False, 'enable heuristic signature-based string equation splitting in Nielsen solver'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index e95da54a1..75247762a 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -3281,7 +3281,7 @@ namespace seq { } bool nielsen_graph::apply_regex_factorization(nielsen_node* node) { - if (!m_regex_factorization) + if (m_regex_factorization_threshold == 0) return false; struct rf_split { @@ -3328,6 +3328,8 @@ namespace seq { } feasible.push_back({ sn_p, sn_q, m_dep_mgr.mk_join(mem.m_dep, first_filter_dep) }); + if (feasible.size() > m_regex_factorization_threshold) + break; } if (feasible.empty()) { @@ -3336,25 +3338,26 @@ namespace seq { return true; } - if (feasible.size() > 1) + if (feasible.size() > m_regex_factorization_threshold) continue; - nielsen_node* child = mk_child(node); - mk_edge(node, child, true); - - // remove the original mem from child - auto& child_mems = child->str_mems(); - for (unsigned k = 0; k < child_mems.size(); ++k) { - if (child_mems[k] == mem) { - child_mems[k] = child_mems.back(); - child_mems.pop_back(); - break; - } - } + for (auto& [m_p, m_q, m_dep] : feasible) { + nielsen_node* child = mk_child(node); + mk_edge(node, child, true); - child->add_str_mem(str_mem(first, feasible[0].m_p, feasible[0].m_dep)); - child->add_str_mem(str_mem(tail, feasible[0].m_q, feasible[0].m_dep)); - + // remove the original mem from child + auto& child_mems = child->str_mems(); + for (unsigned k = 0; k < child_mems.size(); ++k) { + if (child_mems[k] == mem) { + child_mems[k] = child_mems.back(); + child_mems.pop_back(); + break; + } + } + + child->add_str_mem(str_mem(first, m_p, m_dep)); + child->add_str_mem(str_mem(tail, m_q, m_dep)); + } return true; } return false; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index f197e6dfa..88d94c397 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -839,7 +839,7 @@ namespace seq { unsigned m_max_nodes = 0; // 0 = unlimited bool m_parikh_enabled = true; bool m_signature_split = false; - bool m_regex_factorization = true; + unsigned m_regex_factorization_threshold = 1; unsigned m_fresh_cnt = 0; nielsen_stats m_stats; @@ -968,7 +968,7 @@ namespace seq { void set_signature_split(bool e) { m_signature_split = e; } - void set_regex_factorization(bool e) { m_regex_factorization = e; } + void set_regex_factorization_threshold(unsigned max) { m_regex_factorization_threshold = max; } // display for debugging std::ostream& display(std::ostream& out) const; diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index ccde98519..d596dd506 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -468,7 +468,7 @@ namespace smt { if (s_expr) ensure_length_var(s_expr); - if (!get_fparams().m_nseq_regex_factorization) + if (!get_fparams().m_nseq_regex_factorization_threshold) return; // Boolean Closure Propagations @@ -709,7 +709,7 @@ namespace smt { m_nielsen.set_max_nodes(get_fparams().m_nseq_max_nodes); m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh); m_nielsen.set_signature_split(get_fparams().m_nseq_signature); - m_nielsen.set_regex_factorization(get_fparams().m_nseq_regex_factorization); + m_nielsen.set_regex_factorization_threshold(get_fparams().m_nseq_regex_factorization_threshold); SASSERT(!m_nielsen.root()->is_currently_conflict());