1#ifndef STORAGE_SSTABLE_H
2#define STORAGE_SSTABLE_H
4#include <vkdb/bloom_filter.h>
5#include <vkdb/concepts.h>
6#include <vkdb/data_range.h>
7#include <vkdb/mem_table.h>
8#include <vkdb/string.h>
9#include <vkdb/time_series_key.h>
21using FilePath = std::filesystem::path;
28template <ArithmeticNoCVRefQuals TValue>
32 using mapped_type = std::optional<TValue>;
33 using value_type = std::pair<const key_type, mapped_type>;
34 using size_type = uint64_t;
56 : file_path_{file_path}
58 MemTable<TValue>::C0_LAYER_SSTABLE_MAX_ENTRIES,
62 if (!std::filesystem::exists(file_path_)) {
81 : file_path_{file_path}
120 [[nodiscard]]
bool operator==(const
SSTable& other) const noexcept {
121 return file_path_ == other.file_path_;
133 save_memtable(std::move(mem_table));
145 return may_contain(key) && in_range(key) && in_index(key);
162 std::ifstream file{file_path_};
163 if (!file.is_open()) {
164 throw std::runtime_error{
165 "SSTable::get(): Unable to open file '"
166 + std::string(file_path_) +
"'."
170 file.seekg(index_.at(key));
172 throw std::runtime_error{
173 "SSTable::get(): Unable to seek to position "
174 + std::to_string(index_.at(key)) +
" in file '"
175 + std::string(file_path_) +
"'."
179 std::string entry_str;
180 std::getline(file, entry_str,
'[');
181 std::getline(file, entry_str,
'[');
182 auto [entry_key, entry_value] = entryFromString<TValue>(std::move(entry_str));
200 if (!overlaps_with(start, end)) {
204 auto start_it{index_.lower_bound(start)};
205 auto end_it{index_.upper_bound(end)};
206 if (start_it == end_it) {
210 std::vector<value_type>
entries;
211 entries.reserve(std::distance(start_it, end_it));
213 std::ifstream file{file_path_, std::ios::binary};
214 for (
auto it{start_it}; it != end_it; ++it) {
215 const auto& [key, pos] = *it;
217 std::string entry_str;
218 std::getline(file, entry_str,
'[');
219 std::getline(file, entry_str,
'[');
220 auto [entry_key, entry_value]
221 = entryFromString<TValue>(std::move(entry_str));
222 entries.emplace_back(entry_key, entry_value);
233 [[nodiscard]] std::vector<value_type>
entries() const noexcept {
234 return getRange(MIN_TIME_SERIES_KEY, MAX_TIME_SERIES_KEY);
242 [[nodiscard]] FilePath
path() const noexcept {
252 auto file_path{file_path_};
253 file_path.replace_extension(
".metadata");
280 using Index = std::map<const key_type, std::streampos>;
290 void update_metadata(
const key_type& key, std::streampos pos) {
293 bloom_filter_.
insert(key);
294 index_.emplace(key, pos);
305 void save_memtable(MemTable<TValue>&& mem_table) {
306 std::ofstream file{file_path_};
307 if (!file.is_open()) {
308 throw std::runtime_error{
309 "SSTable::save_memtable(): Unable to open file '"
310 + std::string(file_path_) +
"'."
314 file << mem_table.size();
315 for (
const auto& [key, value] : mem_table.table()) {
316 auto pos{file.tellp()};
318 throw std::runtime_error{
319 "SSTable::save_memtable(): Unable to get current position "
320 " of filestream for '" + std::string(file_path_) +
"'."
323 update_metadata(key, pos);
324 file << entryToString<TValue>(value_type{key, value});
335 void save_metadata() {
337 if (!file.is_open()) {
338 throw std::runtime_error{
339 "SSTable::save_metadata(): Unable to open file '"
344 file << time_range_.
str() <<
"\n";
345 file << key_range_.
str() <<
"\n";
346 file << bloom_filter_.
str() <<
"\n";
347 file << index_.size() <<
"\n";
348 for (
const auto& [key, pos] : index_) {
349 file << key.str() <<
"^" << pos <<
"\n";
361 void load_metadata() {
363 if (!file.is_open()) {
364 throw std::runtime_error{
365 "SSTable::load_metadata(): Unable to open file '"
371 std::getline(file, line);
372 time_range_ = TimeRange{std::move(line)};
373 std::getline(file, line);
374 key_range_ = KeyRange{std::move(line)};
375 std::getline(file, line);
376 bloom_filter_ = BloomFilter{std::move(line)};
377 std::getline(file, line);
378 auto no_of_entries{std::stoull(line)};
379 for (
auto i{0}; i < no_of_entries; ++i) {
380 std::getline(file, line);
381 auto caret_pos{line.find(
'^')};
382 if (caret_pos == std::string::npos) {
383 throw std::runtime_error{
384 "SSTable::load_metadata(): Invalid index entry '" + line +
"'."
387 key_type key{std::move(line.substr(0, caret_pos))};
388 auto pos{std::stoull(line.substr(caret_pos + 1))};
389 index_.emplace(key, pos);
402 [[nodiscard]]
bool may_contain(
const key_type& key)
const noexcept {
413 [[nodiscard]]
bool in_range(
const key_type& key)
const noexcept {
414 return time_range_.
inRange(key.timestamp()) && key_range_.
inRange(key);
424 [[nodiscard]]
bool in_index(
const key_type& key)
const noexcept {
425 return index_.count(key) > 0;
436 [[nodiscard]]
bool overlaps_with(
437 const key_type& start,
440 return time_range_.
overlapsWith(start.timestamp(), end.timestamp())
448 BloomFilter bloom_filter_;
454 TimeRange time_range_;
std::string str() const noexcept
Get the string representation of the Bloom filter.
bool mayContain(const key_type &key) const noexcept
Check if the Bloom filter may contain a key.
void insert(const key_type &key) noexcept
Insert a key into the Bloom filter.
std::string str() const noexcept
Convert the range to a string.
bool overlapsWith(const data_type &start, const data_type &end) const noexcept
Check if the range overlaps with the given range.
bool inRange(const data_type &data) const noexcept
Check if the data is in the range.
void updateRange(const data_type &data) noexcept
Update the range with the given data.
In-memory table for storing key-value pairs.
Sorted string table for storing key-value pairs.
KeyRange keyRange() const noexcept
Get the key range of the SSTable.
FilePath metadataPath() const noexcept
Get the path of the metadata file.
std::vector< value_type > getRange(const key_type &start, const key_type &end) const noexcept
Get a filtered set of entries in a timestamp range.
void writeDataToDisk(MemTable< TValue > &&mem_table)
Write data to disk.
static constexpr double BLOOM_FILTER_FALSE_POSITIVE_RATE
False positive rate for the Bloom filters.
SSTable(FilePath file_path)
Construct a new SSTable object given a file path.
TimeRange timeRange() const noexcept
Get the time range of the SSTable.
mapped_type get(const key_type &key) const
Get the value associated with a key.
FilePath path() const noexcept
Get the path of the SSTable.
bool contains(const key_type &key) const noexcept
Check if the SSTable may contain the given key.
SSTable(SSTable &&) noexcept=default
Move-construct a SSTable object.
std::vector< value_type > entries() const noexcept
Get the entries of the SSTable.
SSTable(FilePath file_path, MemTable< TValue > &&mem_table, size_type expected_entries=MemTable< TValue >::C0_LAYER_SSTABLE_MAX_ENTRIES)
Construct a new SSTable object given a file path and a memtable.
SSTable()=delete
Deleted default constructor.
Represents a key in vkdb.