// ------------------------------------------------------- // Written by Mohamed Gaber in 2025 // Based on kernel/hashlib.h by Claire Xenia Wolf // ------------------------------------------------------- // This header is free and unencumbered software released into the public domain. // // Anyone is free to copy, modify, publish, use, compile, sell, or // distribute this software, either in source code form or as a compiled // binary, for any purpose, commercial or non-commercial, and by any // means. // // In jurisdictions that recognize copyright laws, the author or authors // of this software dedicate any and all copyright interest in the // software to the public domain. We make this dedication for the benefit // of the public at large and to the detriment of our heirs and // successors. We intend this dedication to be an overt act of // relinquishment in perpetuity of all present and future rights to this // software under copyright law. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // // For more information, please refer to // ------------------------------------------------------- // // pybind11 bridging headers for hashlib template // // These are various binding functions that expose hashlib templates as opaque // types (https://pybind11.readthedocs.io/en/latest/advanced/cast/stl.html#making-opaque-types). // // Opaque types cross language barries by reference, not value. This allows // things like mutating containers that are class properties. // // All methods should be vaguely in the same order as the python reference // https://docs.python.org/3.13/library/stdtypes.html // #include // optional maps cleanest to methods that accept None in Python #include // std::optional #include // base #include // easier operator binding #include // vector #include "kernel/hashlib.h" namespace pybind11 { namespace hashlib { // "traits" template struct is_pointer: std::false_type {}; template struct is_pointer: std::true_type {}; template struct is_optional: std::false_type {}; template struct is_optional< std::optional >: std::true_type {}; bool is_mapping(object obj) { object mapping = module_::import("collections.abc").attr("Mapping"); return isinstance(obj, mapping); } // Set Operations bool is_subset(const iterable &lhs, const iterable &rhs, bool strict = false) { for (auto &element: lhs) { if (!rhs.contains(element)) { return false; } } if (strict) { return len(rhs) > len(lhs); } return true; } template void unionize(C &lhs, const iterable &rhs) { for (auto &element: rhs) { lhs.insert(cast(element)); } } template void difference(C &lhs, const iterable &rhs) { for (auto &element: rhs) { auto element_cxx = cast(element); if (lhs.count(element_cxx)) { lhs.erase(element_cxx); } } } template void intersect(C &lhs, const iterable &rhs) { // Doing it in-place is a lot slower // TODO?: Leave modifying lhs to caller (saves a copy in some cases) // but complicates chaining intersections. C storage(lhs); for (auto &element_cxx: lhs) { if (!rhs.contains(cast(element_cxx))) { storage.erase(element_cxx); } } lhs = std::move(storage); } template void symmetric_difference(C &lhs, const iterable &rhs) { C storage(lhs); for (auto &element: rhs) { auto element_cxx = cast(element); if (lhs.count(element_cxx)) { storage.erase(element_cxx); } else { storage.insert(element_cxx); } } for (auto &element_cxx: lhs) { if (rhs.contains(cast(element_cxx))) { storage.erase(element_cxx); } } lhs = std::move(storage); } // shim template void bind_vector(module &m, const char *name_cstr) { pybind11::bind_vector(m, name_cstr); } // also used for hashlib pool because the semantics are close enough template void bind_set(module &m, const char *name_cstr) { class_(m, name_cstr) .def(init<>()) .def(init()) // copy constructor .def(init([](const iterable &other){ // copy instructor from arbitrary iterables auto s = new C(); unionize(*s, other); return s; })) .def("__len__", [](const C &s){ return (size_t)s.size(); }) .def("__contains__", [](const C &s, const T &v){ return s.count(v); }) .def("__delitem__", [](C &s, const T &v) { auto n = s.erase(v); if (n == 0) throw key_error(str(cast(v))); }) .def("disjoint", [](const C &s, const iterable &other) { for (const auto &element: other) { if (s.count(cast(element))) { return false; } } return true; }) .def("issubset", [](const iterable &s, const iterable &other) { return is_subset(s, other); }) .def("__eq__", [](const iterable &s, const iterable &other) { return is_subset(s, other) && len(s) == len(other); }) .def("__le__", [](const iterable &s, const iterable &other) { return is_subset(s, other); }) .def("__lt__", [](const iterable &s, const iterable &other) { return is_subset(s, other, true); }) .def("issuperset", [](const iterable &s, const iterable &other) { return is_subset(other, s); }) .def("__ge__", [](const iterable &s, const iterable &other) { return is_subset(other, s); }) .def("__gt__", [](const iterable &s, const iterable &other) { return is_subset(other, s, true); }) .def("union", [](const C &s, const args &others) { auto result = new C(s); for (const auto &arg: others) { auto arg_iterable = reinterpret_borrow(arg); unionize(*result, arg_iterable); } return result; }) .def("__or__", [](const C &s, const iterable &other) { auto result = new C(s); unionize(*result, other); return result; }) .def("__ior__", [](C &s, const iterable &other) { unionize(s, other); return s; }) .def("intersection", [](const C &s, const args &others) { auto result = new C(s); for (const auto &arg: others) { auto arg_iterable = reinterpret_borrow(arg); intersect(*result, arg_iterable); } return result; }) .def("__and__", [](const C &s, const iterable &other) { auto result = new C(s); intersect(*result, other); return result; }) .def("__iand__", [](C &s, const iterable &other) { intersect(s, other); return s; }) .def("difference", [](const C &s, const args &others) { auto result = new C(s); for (const auto &arg: others) { auto arg_iterable = reinterpret_borrow(arg); difference(*result, arg_iterable); } return result; }) .def("__sub__", [](const C &s, const iterable &other) { auto result = new C(s); difference(*result, other); return result; }) .def("__isub__", [](C &s, const iterable &other) { difference(s, other); return s; }) .def("symmetric_difference", [](const C &s, const iterable &other) { auto result = new C(s); symmetric_difference(*result, other); return result; }) .def("__xor__", [](const C &s, const iterable &other) { auto result = new C(s); symmetric_difference(*result, other); return result; }) .def("__ixor__", [](C &s, const iterable &other) { symmetric_difference(s, other); return s; }) .def("copy", [](const C &s) { return new C(s); }) .def("update", [](C &s, iterable iterable) { for (auto item: iterable) { s.insert(item.cast()); } }) .def("add", [](C &s, const T &v){ s.insert(v); }) .def("remove", [](C &s, const T &v){ auto n = s.erase(v); if (n == 0) throw key_error(str(cast(v))); }) .def("discard", [](C &s, const T &v){ s.erase(v); }) .def("clear", [](C &s){ s.clear(); }) .def("pop", [](C &s){ if (s.size() == 0) { throw key_error("empty pool"); } auto result = *s.begin(); s.erase(result); return result; }) .def("__bool__", [](const C &s) { return s.size() != 0; }) .def("__iter__", [](const C &s){ return make_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("__eq__", [](const C &s, const C &other) { return s == other; }) .def("__eq__", [](const C &s, const iterable &other) { C other_cast; unionize(other_cast, other); return s == other_cast; }) .def("__repr__", [name_cstr](const iterable &s){ // repr(set(s)) where s is iterable would be more terse/robust // but are there concerns with copying? str representation = str(name_cstr) + str("({"); str comma(", "); for (const auto &element: s) { representation += repr(element); representation += comma; // python supports trailing commas } representation += str("})"); return representation; }); } // shim template void bind_pool(module &m, const char *name_cstr) { bind_set(m, name_cstr); } template void update_dict(C &target, const iterable &iterable_or_mapping) { if (is_mapping(iterable_or_mapping)) { for (const auto &key: iterable_or_mapping) { target[cast(key)] = cast(iterable_or_mapping[key]); } } else { for (const auto &pair: iterable_or_mapping) { if (len(pair) != 2) { throw value_error(str("iterable element %s has more than two elements").format(str(pair))); } target[cast(pair[cast(0)])] = cast(pair[cast(1)]); } } } template void bind_dict(module &m, const char *name_cstr) { auto cls = class_(m, name_cstr) .def(init<>()) .def(init()) // copy constructor .def(init([](const iterable &other){ // copy instructor from arbitrary iterables and mappings auto s = new C(); update_dict(*s, other); return s; })) .def("__len__", [](const C &s){ return (size_t)s.size(); }) .def("__getitem__", [](const C &s, const K &k) { return s.at(k); }) .def("__setitem__", [](C &s, const K &k, const V &v) { s[k] = v; }) .def("__delitem__", [](C &s, const K &k) { auto n = s.erase(k); if (n == 0) throw key_error("remove: key not found"); }) .def("__contains__", [](const C &s, const K &k) { return s.count(k) != 0; }) .def("__iter__", [](const C &s){ return make_key_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("clear", [](C &s){ s.clear(); }) .def("copy", [](const C &s) { return new C(s); }) .def("get", [](const C &s, const K& k, std::optional &default_) { if (default_.has_value()) { return s.at(k, *default_); } else { return s.at(k); } }, arg("key"), arg("default") = std::nullopt) .def("items", [](const C &s){ return make_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("keys", [](const C &s){ return make_key_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("pop", [](const C &s, const K& k, std::optional &default_) { if (default_.has_value()) { return s.at(k, *default_); } else { return s.at(k); } }, arg("key"), arg("default") = std::nullopt) .def("popitem", [](C &s) { auto it = s.begin(); if (it == s.end()) { throw key_error("dict is empty"); } auto copy = *it; s.erase(it); return copy; }) .def("setdefault", [name_cstr](C &s, const K& k, std::optional &default_) { auto it = s.find(k); if (it != s.end()) { return it->second; } if (default_.has_value()) { s[k] = *default_; return *default_; } // if pointer, nullptr can be our default if constexpr (is_pointer::value) { s[k] = nullptr; return (V)nullptr; } if constexpr (is_optional::value) { s[k] = std::nullopt; return std::nullopt; } throw type_error(std::string("the value type of ") + name_cstr + " is not nullable"); }, arg("key"), arg("default") = std::nullopt) .def("update", [](C &s, iterable iterable_or_mapping) { update_dict(s, iterable_or_mapping); }, arg("iterable_or_mapping")) .def("values", [](const C &s){ return make_value_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("__or__", [](const C &s, iterable iterable_or_mapping) { auto result = new C(s); update_dict(*result, iterable_or_mapping); return result; }) .def("__ior__", [](C &s, iterable iterable_or_mapping) { update_dict(s, iterable_or_mapping); return s; }) .def("__bool__", [](const C &s) { return s.size() != 0; }) .def("__repr__", [name_cstr](const C &s) { // repr(dict(s)) where s is iterable would be more terse/robust // but are there concerns with copying? str representation = str(name_cstr) + str("({"); str colon(": "); str comma(", "); for (const auto &item: s) { representation += repr(cast(item.first)); representation += colon; representation += repr(cast(item.second)); representation += comma; // python supports trailing commas } representation += str("})"); return representation; }); // K is always comparable // Python implements `is` as a fallback to check if it's the same object if constexpr (detail::is_comparable::value) { cls.def("__eq__", [](const C &s, const C &other) { return s == other; }); cls.def("__eq__", [](const C &s, const iterable &other) { C other_cast; update_dict(other_cast, other); return s == other_cast; }); } // Inherit from collections.abc.Mapping so update operators (and a bunch // of other things) work. auto collections_abc = module_::import("collections.abc"); auto mapping = getattr(collections_abc, "Mapping"); auto current_bases = list(getattr(cls, "__bases__")); current_bases.append(mapping); setattr(cls, "__bases__", tuple(current_bases)); } // idict is a special bijection and doesn't map cleanly to dict // // it's cleanest, despite the inconsistency with __getitem__, to just think of // the hashable as key and the integer as value template void bind_idict(module &m, const char *name_cstr) { auto cls = class_(m, name_cstr) .def(init<>()) .def(init()) // copy constructor .def(init([](const iterable &other){ // copy instructor from arbitrary iterables auto s = new C(); for (auto &e: other) { (*s)(cast(e)); } return s; })) .def("__len__", [](const C &s){ return (size_t)s.size(); }) .def("__getitem__", [](const C &s, int v) { return s[v]; }) .def("__call__", [](C &s, const K &k) { return s(k); }) .def("__contains__", [](const C &s, const K &k) { return s.count(k) != 0; }) .def("__iter__", [](const C &s){ return make_iterator(s.begin(), s.end()); }, keep_alive<0,1>()) .def("clear", [](C &s) { s.clear(); }) .def("copy", [](const C &s) { return new C(s); }) .def("get", [](const C &s, const K& k, std::optional &default_) { if (default_.has_value()) { return s.at(k, *default_); } else { return s.at(k); } }, arg("key"), arg("default") = std::nullopt) .def("keys", [](const C &s){ return make_iterator(s.begin(), s.end()); }) .def("values", [](args _){ throw type_error("idicts do not support iteration on the integers"); }) .def("items", [](args _){ throw type_error("idicts do not support pairwise iteration"); }) .def("update", [](C &s, iterable other) { for (auto &e: other) { s(cast(e)); } }) .def("__or__", [](const C &s, iterable other) { auto result = new C(s); for (auto &e: other) { (*result)(cast(e)); } return result; }) .def("__ior__", [](C &s, iterable other) { for (auto &e: other) { s(cast(e)); } return s; }) .def("__bool__", [](const C &s) { return s.size() != 0; }) .def("__repr__", [name_cstr](const C &s){ // repr(dict(s)) where s is iterable would be more terse/robust // but are there concerns with copying? str representation = str(name_cstr) + str("() | {"); str comma(", "); for (const auto &item: s) { representation += repr(cast(item)); representation += comma; // python supports trailing commas } representation += str("}"); return representation; }); for (const char *mutator: {"__setitem__", "__delitem__", "pop", "popitem", "setdefault"}) { cls.def(mutator, [](args _) { throw type_error("idicts do not support arbitrary element mutation"); }); } } }; // namespace hashlib }; // namespace pybind11