vkdb
A time series database engine in C++.
Loading...
Searching...
No Matches
lexer.cpp
1#include <vkdb/lexer.h>
2
3namespace vkdb {
4Lexer::Lexer(const std::string& input) noexcept
5 : input_{input}, position_{0}, line_{1}, column_{1} {}
6
7std::vector<Token> Lexer::tokenize() {
8 std::vector<Token> tokens;
9 while (chars_remaining()) {
10 if (is_whitespace(peek())) {
11 lex_whitespace();
12 } else if (is_alpha(peek())) {
13 tokens.push_back(lex_word());
14 } else if (
15 is_digit(peek()) ||
16 peek() == '-' && is_digit(peek_next())
17 ) {
18 tokens.push_back(lex_number());
19 } else if (peek() == '/' && peek_next() == '/') {
20 advance_while([this](auto ch) { return ch != '\n'; });
21 } else if (peek() == '=') {
22 tokens.push_back(lex_equal());
23 } else if (peek() == ',') {
24 tokens.push_back(lex_comma());
25 } else if (peek() == ';') {
26 tokens.push_back(lex_semicolon());
27 } else {
28 tokens.push_back(lex_unknown());
29 }
30 }
31 tokens.push_back(lex_end_of_file());
32 return tokens;
33}
34
35bool Lexer::is_whitespace(char ch) const noexcept {
36 return std::isspace(ch);
37}
38
39bool Lexer::is_alpha(char ch) const noexcept {
40 return std::isalpha(ch) || ch == '_';
41}
42
43bool Lexer::is_digit(char ch) const noexcept {
44 return std::isdigit(ch);
45}
46
47bool Lexer::is_alnum(char ch) const noexcept {
48 return is_alpha(ch) || is_digit(ch);
49}
50
51bool Lexer::chars_remaining() const noexcept {
52 return position_ < input_.length();
53}
54
55char Lexer::peek() const {
56 return input_[position_];
57}
58
59char Lexer::peek_next() const {
60 if (position_ + 1 >= input_.length()) {
61 return '\0';
62 }
63 return input_[position_ + 1];
64}
65
66char Lexer::advance() {
67 auto current{input_[position_++]};
68 ++column_;
69 if (current == '\n') {
70 ++line_;
71 column_ = 1;
72 }
73 return current;
74}
75
76template <std::predicate<char> Pred>
77void Lexer::advance_while(const Pred& pred) noexcept {
78 while (chars_remaining() && pred(peek())) {
79 advance();
80 }
81}
82
83void Lexer::lex_whitespace() noexcept {
84 advance_while([this](auto ch) { return is_whitespace(ch); });
85}
86
87Token Lexer::lex_word() noexcept {
88 auto start{position_};
89 advance_while([this](auto ch) { return is_alnum(ch); });
90 auto lexeme{make_lexeme_from(start)};
91 if (WORD_TO_TOKEN_TYPE.contains(lexeme)) {
92 return make_token(WORD_TO_TOKEN_TYPE.at(lexeme), lexeme);
93 }
94 return make_token(TokenType::IDENTIFIER, lexeme);
95}
96
97Token Lexer::lex_number() noexcept {
98 auto start{position_};
99 if (peek() == '-') {
100 advance();
101 }
102 advance_while([this](auto ch) { return is_digit(ch); });
103 if (peek() == '.' && is_digit(peek_next())) {
104 advance();
105 advance_while([this](auto ch) { return is_digit(ch); });
106 }
107 return make_token(TokenType::NUMBER, make_lexeme_from(start));
108}
109
110void Lexer::lex_comment() noexcept {
111 advance_while([this](auto ch) { return ch != '\n'; });
112}
113
114Token Lexer::lex_equal() noexcept {
115 advance();
116 return make_token(TokenType::EQUAL, "=");
117}
118
119Token Lexer::lex_comma() noexcept {
120 advance();
121 return make_token(TokenType::COMMA, ",");
122}
123
124Token Lexer::lex_semicolon() noexcept {
125 advance();
126 return make_token(TokenType::SEMICOLON, ";");
127}
128
129Token Lexer::lex_unknown() noexcept {
130 auto unknown{peek()};
131 advance();
132 return make_token(TokenType::UNKNOWN, {unknown});
133}
134
135Token Lexer::lex_end_of_file() noexcept {
136 return make_token(TokenType::END_OF_FILE, "");
137}
138
139Lexeme Lexer::make_lexeme_from(size_type start) const {
140 return input_.substr(start, position_ - start);
141}
142
143Token Lexer::make_token(TokenType type, const Lexeme& lexeme) noexcept {
144 return Token{type, lexeme, line_, column_ - lexeme.length()};
145}
146
147} // namespace vkdb
std::vector< Token > tokenize()
Tokenize the input string.
Definition lexer.cpp:7
Lexer()=delete
Deleted default constructor.