Skip to content
Snippets Groups Projects
Commit 8286e4c3 authored by Wlad's avatar Wlad
Browse files

wip: base ribbon ref implementation

parent 5f33088c
No related branches found
No related tags found
No related merge requests found
......@@ -17,110 +17,115 @@ struct Filter<FilterType::Ribbon, FilterParameter, _k, OptimizationParameter> {
static constexpr bool supports_add_partition = false;
using OP = OptimizationParameter;
using FP = FilterParameter;
// static_assert(OP::simd == parameter::SIMD::AVX2, "only AVX2 is supported!");
static_assert(OP::partitioning == parameter::Partitioning::Disabled,
"partitioning must be disabled!");
using Scalar = simd::Vector<OP::registerSize, parameter::SIMD::Scalar>;
using AVX2Vector = simd::Vector<OP::registerSize, parameter::SIMD::AVX2>;
// using AVX2Vector = simd::Vector<OP::registerSize, parameter::SIMD::AVX2>;
using T = typename Scalar::T;
using Addresser = addresser::Addresser<OP::addressingMode, Scalar>;
using Hasher = hash::Hasher<OP::hashingMode, Scalar, 0>;
const size_t n_partitions = 1;
struct DefaultTypesAndSettings {
using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
using ResultRow = uint8_t;
using Index = uint32_t;
using Key = T;
using Hash = uint64_t;
using Seed = uint32_t;
static constexpr bool kIsFilter = true;
static constexpr bool kHomogeneous = false;
static constexpr bool kFirstCoeffAlwaysOne = true;
static constexpr bool kUseSmash = false;
static constexpr bool kAllowZeroStarts = false;
static Hash HashFn(const T &input, uint64_t raw_seed) {
// No re-seeding for Homogeneous, because it can be skipped in practice
return input;
}
};
// Define ribbon base types
using CoeffRow = typename DefaultTypesAndSettings::CoeffRow;
using ResultRow = typename DefaultTypesAndSettings::ResultRow;
using Index = typename DefaultTypesAndSettings::Index;
using Hash = typename DefaultTypesAndSettings::Hash;
using Key = typename DefaultTypesAndSettings::Key;
using Seed = typename DefaultTypesAndSettings::Seed;
using CoeffType = FP::coeffType;
// Define ribbon implementation/usage types
using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<DefaultTypesAndSettings>;
using Banding = ROCKSDB_NAMESPACE::ribbon::StandardBanding<DefaultTypesAndSettings>;
using SimpleSoln = ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<DefaultTypesAndSettings>;
using InterleavedSoln =
ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution<DefaultTypesAndSettings>;
static constexpr double kFractionalCols =
FP::kNumColumns == 0 ? FP::kMilliBitsPerKey / 1000.0 : FP::kNumColumns;
static double GetBestOverheadFactor() {
double overhead = (4.0 + kFractionalCols * 0.25) / (8.0 * sizeof(CoeffType));
return 1.0 + overhead;
}
size_t s;
size_t num_slots;
size_t n_bytes;
Addresser addresser;
task::TaskQueue<OP::multiThreading> queue;
unique_ptr<char[]> ptr;
unique_ptr<InterleavedSoln> soln;
unique_ptr<Banding> banding;
Hasher hasher;
Filter(size_t s, size_t, size_t n_threads, size_t n_tasks_per_level)
: s(s), n_partitions(n_partitions), queue(n_threads, n_tasks_per_level) {}
Filter(size_t s, size_t, size_t n_threads, size_t n_tasks_per_level) {}
forceinline void init(const T *histogram) {
printf("something is running");
// T n_blocks = *histogram * s / 100 / sizeof(AVX2Vector) + 1;
// addresser = std::move(Addresser(&n_blocks, 1));
// if (filter.directory_) {
// free(filter.directory_);
// }
// n_bytes = simd::valign<AVX2Vector>(addresser.get_size(0) * sizeof(AVX2Vector));
// filter.directory_ = reinterpret_cast<kudu::BlockBloomFilter::Bucket *>(
// simd::valloc<AVX2Vector>(n_bytes, 0));
// we assume only one parition in historgram
size_t item_count = histogram[0] + 1;
num_slots =
InterleavedSoln::RoundUpNumSlots((size_t)(GetBestOverheadFactor()) * item_count);
n_bytes = (static_cast<size_t>((num_slots * kFractionalCols + 7) / 8));
ptr = unique_ptr<char[]>(new char[n_bytes]);
soln = unique_ptr<InterleavedSoln>(new InterleavedSoln(ptr.get(), n_bytes));
banding = unique_ptr<Banding>(new Banding(num_slots));
}
forceinline bool contains(const T &value, const size_t = 0) {
// const Scalar v = Scalar(value);
// const Scalar h = Hasher::hash(Scalar(v));
// const Scalar bucket_idx =
// addresser.compute_address_vertical(0, simd::extractAddressBits(h));
// if constexpr (OP::registerSize == parameter::RegisterSize::_32bit) {
// return filter.BucketFindAVX2(bucket_idx.vector,
// Hasher::template rehash<0>(h, v).vector);
// } else {
// return filter.BucketFindAVX2(bucket_idx.vector, h.vector >> 32);
// }
return false;
return soln->FilterQuery(value, hasher);
}
forceinline bool add(const T &value, const size_t = 0) {
// const Scalar scalar = Scalar(value);
// const Scalar hash = Hasher::hash(Scalar(scalar));
// const Scalar bucket_idx =
// addresser.compute_address_vertical(0, simd::extractAddressBits(hash));
// if constexpr (OP::registerSize == parameter::RegisterSize::_32bit) {
// filter.BucketInsertAVX2(bucket_idx.vector,
// Hasher::template rehash<0>(hash, scalar).vector);
// } else {
// filter.BucketInsertAVX2(bucket_idx.vector, hash.vector >> 32);
// }
if (!banding->Add(value)) {
return false;
}
soln->BackSubstFrom(*banding);
return true;
}
bool construct(T *values, size_t length) {
// T histogram = length;
// init(&histogram);
// for (size_t i = 0; i < length; i++) {
// add(values[i]);
// }
// return true;
return false;
uint32_t histogram = length;
init(&histogram);
if (!banding->AddRange(values, values + sizeof(T) * length)) {
return false;
}
soln->BackSubstFrom(*banding);
return true;
}
size_t count(T *values, size_t length) {
// if constexpr (OP::multiThreading == parameter::MultiThreading::Disabled) {
// size_t counter = 0;
// for (size_t i = 0; i < length; i++) {
// counter += contains(values[i]);
// }
// return counter;
// } else {
// std::atomic<size_t> counter{0};
// size_t begin = 0;
// for (size_t i = queue.get_n_tasks_per_level(); i > 0; i--) {
// const size_t end = begin + (length - begin) / i;
// queue.add_task([this, &counter, values, begin, end](size_t) {
// size_t local_counter = 0;
// for (size_t i = begin; i < end; i++) {
// local_counter += contains(values[i]);
// }
// counter += local_counter;
// });
// begin = end;
// }
// queue.execute_tasks();
// return counter;
// }
return 0;
if constexpr (OP::multiThreading == parameter::MultiThreading::Disabled) {
size_t counter = 0;
for (size_t i = 0; i < length; i++) {
counter += contains(values[i]);
}
return counter;
} else {
return 0;
}
}
size_t size() {
......@@ -139,9 +144,8 @@ struct Filter<FilterType::Ribbon, FilterParameter, _k, OptimizationParameter> {
std::string to_string() {
std::string s = "\n{\n";
s += "\t\"k\": " + std::to_string(8) + ",\n";
s += "\t\"size\": " + std::to_string(size() * 8) + " bits,\n";
s += "\t\"filter_params\": " + ribbon::Standard<8>::to_string() + ",\n";
s += "\t\"filter_params\": " + FP::to_string() + ",\n";
s += "\t\"optimization_params\": " + OP::to_string() + "\n";
s += "}\n";
......
#pragma once
#include <ribbon/ribbon_alg.h>
#include <ribbon/util/ribbon_impl.h>
namespace filters::ribbon {
template <size_t k>
template <typename _CoeffType, uint32_t _kNumColumns, uint32_t _kMilliBitsPerKey = 7700>
struct RibbonParameter {
static constexpr uint32_t kMilliBitsPerKey = _kMilliBitsPerKey;
static constexpr uint32_t kNumColumns = _kNumColumns;
using coeffType = _CoeffType;
static std::string to_string() {
std::string s = "\n{\n";
s += "\t\"k\": " + std::to_string(k) + ",\n";
s += "kMilliBitsPerKey: " + std::to_string(kMilliBitsPerKey) + ",\n";
s += "kNumColumns: " + std::to_string(kNumColumns) + ",\n";
// s += "coeffType: " + std::to_string(typename coeffType) + ",\n";
s += "}\n";
return s;
}
};
template <size_t k>
using Standard = RibbonParameter<k>;
template <size_t s = 0>
using Standard = RibbonParameter<uint32_t, 0>;
} // namespace filters::ribbon
\ No newline at end of file
......@@ -35,6 +35,6 @@ using RibbonScalarSmall = FilterTestConfig<TestFilterType,
* Test Types
*/
using Ribbon32TestTypes = ::testing::Types<RibbonScalarSmall<ribbon::Standard, 64>>;
using Ribbon32TestTypes = ::testing::Types<RibbonScalarSmall<ribbon::Standard, 3>>;
} // namespace test::ribbon_test
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment