Skip to content
Snippets Groups Projects
Commit 0fa2deb1 authored by Wlad's avatar Wlad
Browse files

feat: add chained filter

parent 6c284e2a
No related branches found
No related tags found
No related merge requests found
......@@ -72,6 +72,7 @@ add_subdirectory(lib/amd_mortonfilter)
add_subdirectory(lib/efficient_cuckoofilter)
add_subdirectory(lib/vacuumfilter)
add_subdirectory(lib/ribbon)
add_subdirectory(lib/chained)
add_subdirectory(src)
......
......@@ -3,7 +3,7 @@ set(library_name filters)
# build library
add_library(${library_name} INTERFACE)
target_include_directories(${library_name} INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
target_link_libraries(${library_name} INTERFACE ribbon cityhash libdivide fastfilter impala efficient_cuckoofilter amd_mortonfilter vacuumfilter)
target_link_libraries(${library_name} INTERFACE chained ribbon cityhash libdivide fastfilter impala efficient_cuckoofilter amd_mortonfilter vacuumfilter)
if (NUMA_FOUND)
target_link_libraries(${library_name} INTERFACE ${NUMA_LIBRARY})
......
......@@ -14,6 +14,7 @@
* Reference Implementations
*/
#include <reference/amd_mortonfilter/amd_mortonfilter_filter.hpp>
#include <reference/chained/chained_filter.hpp>
#include <reference/efficient_cuckoofilter/efficient_cuckoofilter_filter.hpp>
#include <reference/fastfilter/fastfilter_filter.hpp>
#include <reference/impala/impala_bloom_filter.hpp>
......
......@@ -20,6 +20,7 @@ enum class FilterType : size_t {
VacuumFilter,
LookupBloom,
Ribbon,
Chained
};
template <FilterType filter, typename FilterParameter, size_t k, typename OptimizationParameter>
......
#pragma once
#include <chained/self-adaptive-hashing/include/CuckooHash.h>
#include <chained/src/Algorithm2.h>
#include <stdio.h>
#include <address/addresser.hpp>
#include <ctime>
#include <filter_base.hpp>
#include <parameter/parameter.hpp>
#include <simd/vector.hpp>
#include <vector>
namespace filters {
template <typename FilterParameter, size_t _k, typename OptimizationParameter>
struct Filter<FilterType::Chained, FilterParameter, _k, OptimizationParameter> {
static constexpr bool supports_add = false;
static constexpr bool supports_add_partition = false;
using OP = OptimizationParameter;
using FP = FilterParameter;
using Scalar = simd::Vector<OP::registerSize, parameter::SIMD::Scalar>;
using T = Scalar::T;
std::unique_ptr<ChainedFilter> filter;
std::unique_ptr<Cuckoo> chained_filter;
size_t n = 1 << 19;
size_t s = 100;
static_assert(OP::partitioning == parameter::Partitioning::Disabled,
"partitioning must be disabled!");
Filter(size_t s, size_t n, size_t n_threads, size_t n_tasks_per_level) : s(s) {}
forceinline void init(const T *histogram) {
// size_t item_count = histogram[0];
}
forceinline bool contains(const T &value, const size_t = 0) {
if (filter->query(value)) {
return true;
}
return chained_filter->query_with_pre(value, false);
}
forceinline bool add(const T &value, const size_t = 0) {
int re = chained_filter->insert(make_pair(value, value));
filter->insert(value, re);
return true;
}
bool construct(T *values, size_t length) {
size_t n = length * s / 100;
auto construct_filter = std::unique_ptr<Cuckoo>(new Cuckoo(n));
for (size_t i = 0; i < length; i++) {
auto value = values[i];
if (construct_filter->insert(make_pair(value, value)) == -1) {
return false;
}
}
pair<int, int> re = construct_filter->check();
chained_filter = std::unique_ptr<Cuckoo>(new Cuckoo(n));
filter = std::unique_ptr<ChainedFilter>(new ChainedFilter(re.first, re.second));
for (size_t i = 0; i < length; i++) {
auto value = values[i];
chained_filter->insert(make_pair(value, value));
}
for (size_t i = 0; i < length; i++) {
auto value = values[i];
int re = chained_filter->getpos(value);
filter->insert(value, re);
}
size_t last_value = 0;
for (int tt = 0; tt <= 10; tt++) {
size_t sum = 0;
for (size_t i = 0; i < length; i++) {
int tr = filter->query(i);
if (chained_filter->query_with_pre(i, tr)) {
sum++;
filter->change(i, !tr);
}
}
if (last_value == sum) {
printf("stabalized after %d iterations at %d mistakes", tt, sum);
break;
}
last_value = sum;
}
return true;
}
size_t count(T *values, size_t length) {
if constexpr (OP::multiThreading == parameter::MultiThreading::Disabled) {
size_t counter = 0;
for (size_t i = 0; i < length; i++) {
counter += contains(values[i]);
}
return counter;
} else {
return 0;
}
}
size_t size() {
return filter->size();
}
size_t avg_size() {}
size_t retries() {
// cannot get the number of retries needed for building from the implementation
return 0;
}
std::string to_string() {
std::string s = "\n{\n";
s += "\t\"size\": " + std::to_string(size() * 8) + " bits,\n";
s += "\t\"filter_params\": " + FP::to_string() + ",\n";
s += "\t\"optimization_params\": " + OP::to_string() + "\n";
s += "}\n";
return s;
}
};
} // namespace filters
\ No newline at end of file
#pragma once
#include <string>
namespace filters::chained {
struct ChainedFilterParameter {
static std::string to_string() {
std::string s = "{}";
return s;
}
};
template <size_t>
using Standard = ChainedFilterParameter;
} // namespace filters::chained
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment