vkdb
A time series database engine in C++.
Loading...
Searching...
No Matches
sstable.h
1#ifndef STORAGE_SSTABLE_H
2#define STORAGE_SSTABLE_H
3
4#include <vkdb/bloom_filter.h>
5#include <vkdb/concepts.h>
6#include <vkdb/data_range.h>
7#include <vkdb/mem_table.h>
8#include <vkdb/string.h>
9#include <vkdb/time_series_key.h>
10
11#include <filesystem>
12#include <fstream>
13#include <string>
14#include <unistd.h>
15
16namespace vkdb {
21using FilePath = std::filesystem::path;
22
28template <ArithmeticNoCVRefQuals TValue>
29class SSTable {
30public:
31 using key_type = TimeSeriesKey;
32 using mapped_type = std::optional<TValue>;
33 using value_type = std::pair<const key_type, mapped_type>;
34 using size_type = uint64_t;
35
40 static constexpr double BLOOM_FILTER_FALSE_POSITIVE_RATE{0.01};
41
46 SSTable() = delete;
47
55 explicit SSTable(FilePath file_path)
56 : file_path_{file_path}
57 , bloom_filter_{
58 MemTable<TValue>::C0_LAYER_SSTABLE_MAX_ENTRIES,
60 }
61 {
62 if (!std::filesystem::exists(file_path_)) {
63 return;
64 }
65 load_metadata();
66 }
67
76 explicit SSTable(
77 FilePath file_path,
78 MemTable<TValue>&& mem_table,
79 size_type expected_entries = MemTable<TValue>::C0_LAYER_SSTABLE_MAX_ENTRIES
80 )
81 : file_path_{file_path}
82 , bloom_filter_{
83 expected_entries,
85 }
86 {
87 writeDataToDisk(std::move(mem_table));
88 }
89
94 SSTable(SSTable&&) noexcept = default;
95
100 SSTable& operator=(SSTable&&) noexcept = default;
101
106 SSTable(const SSTable&) = delete;
107
112 SSTable& operator=(const SSTable&) = delete;
113
118 ~SSTable() noexcept = default;
119
120 [[nodiscard]] bool operator==(const SSTable& other) const noexcept {
121 return file_path_ == other.file_path_;
122 }
123
133 save_memtable(std::move(mem_table));
134 save_metadata();
135 }
136
144 [[nodiscard]] bool contains(const key_type& key) const noexcept {
145 return may_contain(key) && in_range(key) && in_index(key);
146 }
147
157 [[nodiscard]] mapped_type get(const key_type& key) const {
158 if (!contains(key)) {
159 return std::nullopt;
160 }
161
162 std::ifstream file{file_path_};
163 if (!file.is_open()) {
164 throw std::runtime_error{
165 "SSTable::get(): Unable to open file '"
166 + std::string(file_path_) + "'."
167 };
168 }
169
170 file.seekg(index_.at(key));
171 if (!file) {
172 throw std::runtime_error{
173 "SSTable::get(): Unable to seek to position "
174 + std::to_string(index_.at(key)) + " in file '"
175 + std::string(file_path_) + "'."
176 };
177 }
178
179 std::string entry_str;
180 std::getline(file, entry_str, '[');
181 std::getline(file, entry_str, '[');
182 auto [entry_key, entry_value] = entryFromString<TValue>(std::move(entry_str));
183
184 file.close();
185
186 return entry_value;
187 }
188
196 [[nodiscard]] std::vector<value_type> getRange(
197 const key_type& start,
198 const key_type& end
199 ) const noexcept {
200 if (!overlaps_with(start, end)) {
201 return {};
202 }
203
204 auto start_it{index_.lower_bound(start)};
205 auto end_it{index_.upper_bound(end)};
206 if (start_it == end_it) {
207 return {};
208 }
209
210 std::vector<value_type> entries;
211 entries.reserve(std::distance(start_it, end_it));
212
213 std::ifstream file{file_path_, std::ios::binary};
214 for (auto it{start_it}; it != end_it; ++it) {
215 const auto& [key, pos] = *it;
216 file.seekg(pos);
217 std::string entry_str;
218 std::getline(file, entry_str, '[');
219 std::getline(file, entry_str, '[');
220 auto [entry_key, entry_value]
221 = entryFromString<TValue>(std::move(entry_str));
222 entries.emplace_back(entry_key, entry_value);
223 }
224
225 return entries;
226 }
227
233 [[nodiscard]] std::vector<value_type> entries() const noexcept {
234 return getRange(MIN_TIME_SERIES_KEY, MAX_TIME_SERIES_KEY);
235 }
236
242 [[nodiscard]] FilePath path() const noexcept {
243 return file_path_;
244 }
245
251 [[nodiscard]] FilePath metadataPath() const noexcept {
252 auto file_path{file_path_};
253 file_path.replace_extension(".metadata");
254 return file_path;
255 }
256
262 [[nodiscard]] KeyRange keyRange() const noexcept {
263 return key_range_;
264 }
265
271 [[nodiscard]] TimeRange timeRange() const noexcept {
272 return time_range_;
273 }
274
275private:
280 using Index = std::map<const key_type, std::streampos>;
281
290 void update_metadata(const key_type& key, std::streampos pos) {
291 time_range_.updateRange(key.timestamp());
292 key_range_.updateRange(key);
293 bloom_filter_.insert(key);
294 index_.emplace(key, pos);
295 }
296
305 void save_memtable(MemTable<TValue>&& mem_table) {
306 std::ofstream file{file_path_};
307 if (!file.is_open()) {
308 throw std::runtime_error{
309 "SSTable::save_memtable(): Unable to open file '"
310 + std::string(file_path_) + "'."
311 };
312 }
313
314 file << mem_table.size();
315 for (const auto& [key, value] : mem_table.table()) {
316 auto pos{file.tellp()};
317 if (pos == -1) {
318 throw std::runtime_error{
319 "SSTable::save_memtable(): Unable to get current position "
320 " of filestream for '" + std::string(file_path_) + "'."
321 };
322 }
323 update_metadata(key, pos);
324 file << entryToString<TValue>(value_type{key, value});
325 }
326
327 file.close();
328 }
329
335 void save_metadata() {
336 std::ofstream file{metadataPath()};
337 if (!file.is_open()) {
338 throw std::runtime_error{
339 "SSTable::save_metadata(): Unable to open file '"
340 + std::string(metadataPath()) + "'."
341 };
342 }
343
344 file << time_range_.str() << "\n";
345 file << key_range_.str() << "\n";
346 file << bloom_filter_.str() << "\n";
347 file << index_.size() << "\n";
348 for (const auto& [key, pos] : index_) {
349 file << key.str() << "^" << pos << "\n";
350 }
351
352 file.close();
353 }
354
361 void load_metadata() {
362 std::ifstream file{metadataPath()};
363 if (!file.is_open()) {
364 throw std::runtime_error{
365 "SSTable::load_metadata(): Unable to open file '"
366 + std::string(metadataPath()) + "'."
367 };
368 }
369
370 std::string line;
371 std::getline(file, line);
372 time_range_ = TimeRange{std::move(line)};
373 std::getline(file, line);
374 key_range_ = KeyRange{std::move(line)};
375 std::getline(file, line);
376 bloom_filter_ = BloomFilter{std::move(line)};
377 std::getline(file, line);
378 auto no_of_entries{std::stoull(line)};
379 for (auto i{0}; i < no_of_entries; ++i) {
380 std::getline(file, line);
381 auto caret_pos{line.find('^')};
382 if (caret_pos == std::string::npos) {
383 throw std::runtime_error{
384 "SSTable::load_metadata(): Invalid index entry '" + line + "'."
385 };
386 }
387 key_type key{std::move(line.substr(0, caret_pos))};
388 auto pos{std::stoull(line.substr(caret_pos + 1))};
389 index_.emplace(key, pos);
390 }
391
392 file.close();
393 }
394
402 [[nodiscard]] bool may_contain(const key_type& key) const noexcept {
403 return bloom_filter_.mayContain(key);
404 }
405
413 [[nodiscard]] bool in_range(const key_type& key) const noexcept {
414 return time_range_.inRange(key.timestamp()) && key_range_.inRange(key);
415 }
416
424 [[nodiscard]] bool in_index(const key_type& key) const noexcept {
425 return index_.count(key) > 0;
426 }
427
436 [[nodiscard]] bool overlaps_with(
437 const key_type& start,
438 const key_type& end
439 ) const noexcept {
440 return time_range_.overlapsWith(start.timestamp(), end.timestamp())
441 || key_range_.overlapsWith(start, end);
442 }
443
448 BloomFilter bloom_filter_;
449
454 TimeRange time_range_;
455
460 KeyRange key_range_;
461
466 Index index_;
467
472 FilePath file_path_;
473};
474} // namespace vkdb
475
476#endif // STORAGE_SSTABLE_H
std::string str() const noexcept
Get the string representation of the Bloom filter.
bool mayContain(const key_type &key) const noexcept
Check if the Bloom filter may contain a key.
void insert(const key_type &key) noexcept
Insert a key into the Bloom filter.
std::string str() const noexcept
Convert the range to a string.
Definition data_range.h:192
bool overlapsWith(const data_type &start, const data_type &end) const noexcept
Check if the range overlaps with the given range.
Definition data_range.h:144
bool inRange(const data_type &data) const noexcept
Check if the data is in the range.
Definition data_range.h:132
void updateRange(const data_type &data) noexcept
Update the range with the given data.
Definition data_range.h:113
In-memory table for storing key-value pairs.
Definition mem_table.h:17
Sorted string table for storing key-value pairs.
Definition sstable.h:29
KeyRange keyRange() const noexcept
Get the key range of the SSTable.
Definition sstable.h:262
FilePath metadataPath() const noexcept
Get the path of the metadata file.
Definition sstable.h:251
std::vector< value_type > getRange(const key_type &start, const key_type &end) const noexcept
Get a filtered set of entries in a timestamp range.
Definition sstable.h:196
void writeDataToDisk(MemTable< TValue > &&mem_table)
Write data to disk.
Definition sstable.h:132
static constexpr double BLOOM_FILTER_FALSE_POSITIVE_RATE
False positive rate for the Bloom filters.
Definition sstable.h:40
SSTable(FilePath file_path)
Construct a new SSTable object given a file path.
Definition sstable.h:55
TimeRange timeRange() const noexcept
Get the time range of the SSTable.
Definition sstable.h:271
mapped_type get(const key_type &key) const
Get the value associated with a key.
Definition sstable.h:157
FilePath path() const noexcept
Get the path of the SSTable.
Definition sstable.h:242
bool contains(const key_type &key) const noexcept
Check if the SSTable may contain the given key.
Definition sstable.h:144
SSTable(SSTable &&) noexcept=default
Move-construct a SSTable object.
std::vector< value_type > entries() const noexcept
Get the entries of the SSTable.
Definition sstable.h:233
SSTable(FilePath file_path, MemTable< TValue > &&mem_table, size_type expected_entries=MemTable< TValue >::C0_LAYER_SSTABLE_MAX_ENTRIES)
Construct a new SSTable object given a file path and a memtable.
Definition sstable.h:76
SSTable()=delete
Deleted default constructor.
Represents a key in vkdb.