3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-10-09 09:21:58 +00:00
yosys/pyosys/hashlib.h
2025-10-03 11:54:44 +03:00

522 lines
16 KiB
C++

// -------------------------------------------------------
// Written by Mohamed Gaber in 2025 <me@donn.website>
// Based on kernel/hashlib.h by Claire Xenia Wolf <claire@yosyshq.com>
// -------------------------------------------------------
// This header is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <https://unlicense.org/>
// -------------------------------------------------------
//
// pybind11 bridging headers for hashlib template
//
// These are various binding functions that expose hashlib templates as opaque
// types (https://pybind11.readthedocs.io/en/latest/advanced/cast/stl.html#making-opaque-types).
//
// Opaque types cross language barries by reference, not value. This allows
// things like mutating containers that are class properties.
//
// All methods should be vaguely in the same order as the python reference
// https://docs.python.org/3.13/library/stdtypes.html
//
#include <optional> // optional maps cleanest to methods that accept None in Python
#include <pybind11/stl.h> // std::optional
#include <pybind11/pybind11.h> // base
#include <pybind11/operators.h> // easier operator binding
#include <pybind11/stl_bind.h> // vector
#include "kernel/hashlib.h"
namespace pybind11 {
namespace hashlib {
// "traits"
template <typename T> struct is_pointer: std::false_type {};
template <typename T> struct is_pointer<T*>: std::true_type {};
template <typename T> struct is_optional: std::false_type {};
template <typename T> struct is_optional< std::optional<T> >: std::true_type {};
bool is_mapping(object obj) {
object mapping = module_::import("collections.abc").attr("Mapping");
return isinstance(obj, mapping);
}
// Set Operations
bool is_subset(const iterable &lhs, const iterable &rhs, bool strict = false) {
for (auto &element: lhs) {
if (!rhs.contains(element)) {
return false;
}
}
if (strict) {
return len(rhs) > len(lhs);
}
return true;
}
template <typename C, typename T>
void unionize(C &lhs, const iterable &rhs) {
for (auto &element: rhs) {
lhs.insert(cast<T>(element));
}
}
template <typename C, typename T>
void difference(C &lhs, const iterable &rhs) {
for (auto &element: rhs) {
auto element_cxx = cast<T>(element);
if (lhs.count(element_cxx)) {
lhs.erase(element_cxx);
}
}
}
template <typename C, typename T>
void intersect(C &lhs, const iterable &rhs) {
// Doing it in-place is a lot slower
// TODO?: Leave modifying lhs to caller (saves a copy) but complicates
// chaining intersections.
C storage(lhs);
for (auto &element_cxx: lhs) {
if (!rhs.contains(cast(element_cxx))) {
storage.erase(element_cxx);
}
}
lhs = std::move(storage);
}
template <typename C, typename T>
void symmetric_difference(C &lhs, const iterable &rhs) {
C storage(lhs);
for (auto &element: rhs) {
auto element_cxx = cast<T>(element);
if (lhs.count(element_cxx)) {
storage.erase(element_cxx);
} else {
storage.insert(element_cxx);
}
}
for (auto &element_cxx: lhs) {
if (rhs.contains(cast(element_cxx))) {
storage.erase(element_cxx);
}
}
lhs = std::move(storage);
}
// shim
template <typename C, typename V>
void bind_vector(module &m, const char *name_cstr) {
pybind11::bind_vector<C>(m, name_cstr);
}
// also used for hashlib pool because the semantics are close enough
template <typename C, typename T>
void bind_set(module &m, const char *name_cstr) {
class_<C>(m, name_cstr)
.def(init<>())
.def(init<const C &>()) // copy constructor
.def(init([](const iterable &other){ // copy instructor from arbitrary iterables
auto s = new C();
unionize<C, T>(*s, other);
return s;
}))
.def("__len__", [](const C &s){ return (size_t)s.size(); })
.def("__contains__", [](const C &s, const T &v){ return s.count(v); })
.def("__delitem__", [](C &s, const T &v) {
auto n = s.erase(v);
if (n == 0) throw key_error(str(cast(v)));
})
.def("disjoint", [](const C &s, const iterable &other) {
for (const auto &element: other) {
if (s.count(cast<T>(element))) {
return false;
}
}
return true;
})
.def("issubset", [](const iterable &s, const iterable &other) {
return is_subset(s, other);
})
.def("__eq__", [](const iterable &s, const iterable &other) {
return is_subset(s, other) && len(s) == len(other);
})
.def("__le__", [](const iterable &s, const iterable &other) {
return is_subset(s, other);
})
.def("__lt__", [](const iterable &s, const iterable &other) {
return is_subset(s, other, true);
})
.def("issuperset", [](const iterable &s, const iterable &other) {
return is_subset(other, s);
})
.def("__ge__", [](const iterable &s, const iterable &other) {
return is_subset(other, s);
})
.def("__gt__", [](const iterable &s, const iterable &other) {
return is_subset(other, s, true);
})
.def("union", [](const C &s, const args &others) {
auto result = new C(s);
for (const auto &arg: others) {
auto arg_iterable = reinterpret_borrow<iterable>(arg);
unionize<C, T>(*result, arg_iterable);
}
return result;
})
.def("__or__", [](const C &s, const iterable &other) {
auto result = new C(s);
unionize<C, T>(*result, other);
return result;
})
.def("__ior__", [](C &s, const iterable &other) {
unionize<C, T>(s, other);
return s;
})
.def("intersection", [](const C &s, const args &others) {
auto result = new C(s);
for (const auto &arg: others) {
auto arg_iterable = reinterpret_borrow<iterable>(arg);
intersect<C, T>(*result, arg_iterable);
}
return result;
})
.def("__and__", [](const C &s, const iterable &other) {
auto result = new C(s);
intersect<C, T>(*result, other);
return result;
})
.def("__iand__", [](C &s, const iterable &other) {
intersect<C, T>(s, other);
return s;
})
.def("difference", [](const C &s, const args &others) {
auto result = new C(s);
for (const auto &arg: others) {
auto arg_iterable = reinterpret_borrow<iterable>(arg);
difference<C, T>(*result, arg_iterable);
}
return result;
})
.def("__sub__", [](const C &s, const iterable &other) {
auto result = new C(s);
difference<C, T>(*result, other);
return result;
})
.def("__isub__", [](C &s, const iterable &other) {
difference<C, T>(s, other);
return s;
})
.def("symmetric_difference", [](const C &s, const iterable &other) {
auto result = new C(s);
symmetric_difference<C, T>(*result, other);
return result;
})
.def("__xor__", [](const C &s, const iterable &other) {
auto result = new C(s);
symmetric_difference<C, T>(*result, other);
return result;
})
.def("__ixor__", [](C &s, const iterable &other) {
symmetric_difference<C, T>(s, other);
return s;
})
.def("copy", [](const C &s) {
return new C(s);
})
.def("update", [](C &s, iterable iterable) {
for (auto item: iterable) {
s.insert(item.cast<T>());
}
})
.def("add", [](C &s, const T &v){ s.insert(v); })
.def("remove", [](C &s, const T &v){
auto n = s.erase(v);
if (n == 0) throw key_error(str(cast(v)));
})
.def("discard", [](C &s, const T &v){ s.erase(v); })
.def("clear", [](C &s){ s.clear(); })
.def("pop", [](C &s){
if (s.size() == 0) {
throw key_error("empty pool");
}
auto result = *s.begin();
s.erase(result);
return result;
})
.def("__bool__", [](const C &s) { return s.size() != 0; })
.def("__iter__", [](const C &s){
return make_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("__eq__", [](const C &s, const C &other) { return s == other; })
.def("__eq__", [](const C &s, const iterable &other) {
C other_cast;
unionize<C, T>(other_cast, other);
return s == other_cast;
})
.def("__repr__", [name_cstr](const iterable &s){
// repr(set(s)) where s is iterable would be more terse/robust
// but are there concerns with copying?
str representation = str(name_cstr) + str("({");
str comma(", ");
for (const auto &element: s) {
representation += repr(element);
representation += comma; // python supports trailing commas
}
representation += str("})");
return representation;
});
}
// shim
template <typename C, typename T>
void bind_pool(module &m, const char *name_cstr) {
bind_set<C, T>(m, name_cstr);
}
template <typename C, typename K, typename V>
void update_dict(C &target, const iterable &iterable_or_mapping) {
if (is_mapping(iterable_or_mapping)) {
for (const auto &key: iterable_or_mapping) {
target[cast<K>(key)] = cast<V>(iterable_or_mapping[key]);
}
} else {
for (const auto &pair: iterable_or_mapping) {
if (len(pair) != 2) {
throw value_error(str("iterable element %s has more than two elements").format(str(pair)));
}
target[cast<K>(pair[cast(0)])] = cast<V>(pair[cast(1)]);
}
}
}
template <typename C, typename K, typename V>
void bind_dict(module &m, const char *name_cstr) {
auto cls = class_<C>(m, name_cstr)
.def(init<>())
.def(init<const C &>()) // copy constructor
.def(init([](const iterable &other){ // copy instructor from arbitrary iterables and mappings
auto s = new C();
update_dict<C, K, V>(*s, other);
return s;
}))
.def("__len__", [](const C &s){ return (size_t)s.size(); })
.def("__getitem__", [](const C &s, const K &k) { return s.at(k); })
.def("__setitem__", [](C &s, const K &k, const V &v) { s[k] = v; })
.def("__delitem__", [](C &s, const K &k) {
auto n = s.erase(k);
if (n == 0) throw key_error("remove: key not found");
})
.def("__contains__", [](const C &s, const K &k) { return s.count(k) != 0; })
.def("__iter__", [](const C &s){
return make_key_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("clear", [](C &s){ s.clear(); })
.def("copy", [](const C &s) {
return new C(s);
})
.def("get", [](const C &s, const K& k, std::optional<const V> &default_) {
if (default_.has_value()) {
return s.at(k, *default_);
} else {
return s.at(k);
}
}, arg("key"), arg("default") = std::nullopt)
.def("items", [](const C &s){
return make_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("keys", [](const C &s){
return make_key_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("pop", [](const C &s, const K& k, std::optional<const V> &default_) {
if (default_.has_value()) {
return s.at(k, *default_);
} else {
return s.at(k);
}
}, arg("key"), arg("default") = std::nullopt)
.def("popitem", [](C &s) {
auto it = s.begin();
if (it == s.end()) {
throw key_error("dict is empty");
}
auto copy = *it;
s.erase(it);
return copy;
})
.def("setdefault", [name_cstr](C &s, const K& k, std::optional<const V> &default_) {
auto it = s.find(k);
if (it != s.end()) {
return it->second;
}
if (default_.has_value()) {
s[k] = *default_;
return *default_;
}
// if pointer, nullptr can be our default
if constexpr (is_pointer<V>::value) {
s[k] = nullptr;
return (V)nullptr;
}
if constexpr (is_optional<V>::value) {
s[k] = std::nullopt;
return std::nullopt;
}
throw type_error(std::string("the value type of ") + name_cstr + " is not nullable");
}, arg("key"), arg("default") = std::nullopt)
.def("update", [](C &s, iterable iterable_or_mapping) {
update_dict<C, K, V>(s, iterable_or_mapping);
}, arg("iterable_or_mapping"))
.def("values", [](const C &s){
return make_value_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("__or__", [](const C &s, iterable iterable_or_mapping) {
auto result = new C(s);
update_dict<C, K, V>(*result, iterable_or_mapping);
return result;
})
.def("__ior__", [](C &s, iterable iterable_or_mapping) {
update_dict<C, K, V>(s, iterable_or_mapping);
return s;
})
.def("__bool__", [](const C &s) { return s.size() != 0; })
.def("__repr__", [name_cstr](const C &s) {
// repr(dict(s)) where s is iterable would be more terse/robust
// but are there concerns with copying?
str representation = str(name_cstr) + str("({");
str colon(": ");
str comma(", ");
for (const auto &item: s) {
representation += repr(cast(item.first));
representation += colon;
representation += repr(cast(item.second));
representation += comma; // python supports trailing commas
}
representation += str("})");
return representation;
});
// K is always comparable
// Python implements `is` as a fallback to check if it's the same object
if constexpr (detail::is_comparable<V>::value) {
cls.def("__eq__", [](const C &s, const C &other) { return s == other; });
cls.def("__eq__", [](const C &s, const iterable &other) {
C other_cast;
update_dict<C, K, V>(other_cast, other);
return s == other_cast;
});
}
// Inherit from collections.abc.Mapping so update operators (and a bunch
// of other things) work.
auto collections_abc = module_::import("collections.abc");
auto mapping = getattr(collections_abc, "Mapping");
auto current_bases = list(getattr(cls, "__bases__"));
current_bases.append(mapping);
setattr(cls, "__bases__", tuple(current_bases));
}
// idict is a special bijection and doesn't map cleanly to dict
//
// it's cleanest, despite the inconsistency with __getitem__, to just think of
// the hashable as key and the integer as value
template <typename C, typename K>
void bind_idict(module &m, const char *name_cstr) {
auto cls = class_<C>(m, name_cstr)
.def(init<>())
.def(init<const C &>()) // copy constructor
.def("__len__", [](const C &s){ return (size_t)s.size(); })
.def("__getitem__", [](const C &s, int v) { return s[v]; })
.def("__call__", [](C &s, const K &k) { return s(k); })
.def("__contains__", [](const C &s, const K &k) {
return s.count(k) != 0;
})
.def("__iter__", [](const C &s){
return make_iterator(s.begin(), s.end());
}, keep_alive<0,1>())
.def("clear", [](C &s) {
s.clear();
})
.def("copy", [](const C &s) {
return new C(s);
})
.def("get", [](const C &s, const K& k, std::optional<int> &default_) {
if (default_.has_value()) {
return s.at(k, *default_);
} else {
return s.at(k);
}
}, arg("key"), arg("default") = std::nullopt)
.def("keys", [](const C &s){
return make_iterator(s.begin(), s.end());
})
.def("values", [](args _){
throw type_error("idicts do not support iteration on the integers");
})
.def("items", [](args _){
throw type_error("idicts do not support pairwise iteration");
})
.def("update", [](C &s, iterable iterable) {
for (auto &e: iterable) {
s(cast<K>(e));
}
})
.def("__or__", [](const C &s, iterable iterable) {
auto result = new C(s);
for (auto &e: iterable) {
(*result)(cast<K>(e));
}
return result;
})
.def("__ior__", [](C &s, iterable iterable) {
for (auto &e: iterable) {
s(cast<K>(e));
}
return s;
})
.def("__bool__", [](const C &s) { return s.size() != 0; })
.def("__repr__", [name_cstr](const C &s){
// repr(dict(s)) where s is iterable would be more terse/robust
// but are there concerns with copying?
str representation = str(name_cstr) + str("() | {");
str comma(", ");
for (const auto &item: s) {
representation += repr(cast(item));
representation += comma; // python supports trailing commas
}
representation += str("}");
return representation;
});
for (const char *mutator: {"__setitem__", "__delitem__", "pop", "popitem", "setdefault"}) {
cls.def(mutator, [](args _) {
throw type_error("idicts do not support arbitrary element mutation");
});
}
}
}; // namespace hashlib
}; // namespace pybind11