5 : input_{input}, position_{0}, line_{1}, column_{1} {}
8 std::vector<Token> tokens;
9 while (chars_remaining()) {
10 if (is_whitespace(peek())) {
12 }
else if (is_alpha(peek())) {
13 tokens.push_back(lex_word());
16 peek() ==
'-' && is_digit(peek_next())
18 tokens.push_back(lex_number());
19 }
else if (peek() ==
'/' && peek_next() ==
'/') {
20 advance_while([
this](
auto ch) {
return ch !=
'\n'; });
21 }
else if (peek() ==
'=') {
22 tokens.push_back(lex_equal());
23 }
else if (peek() ==
',') {
24 tokens.push_back(lex_comma());
25 }
else if (peek() ==
';') {
26 tokens.push_back(lex_semicolon());
28 tokens.push_back(lex_unknown());
31 tokens.push_back(lex_end_of_file());
35bool Lexer::is_whitespace(
char ch)
const noexcept {
36 return std::isspace(ch);
39bool Lexer::is_alpha(
char ch)
const noexcept {
40 return std::isalpha(ch) || ch ==
'_';
43bool Lexer::is_digit(
char ch)
const noexcept {
44 return std::isdigit(ch);
47bool Lexer::is_alnum(
char ch)
const noexcept {
48 return is_alpha(ch) || is_digit(ch);
51bool Lexer::chars_remaining() const noexcept {
52 return position_ < input_.length();
55char Lexer::peek()
const {
56 return input_[position_];
59char Lexer::peek_next()
const {
60 if (position_ + 1 >= input_.length()) {
63 return input_[position_ + 1];
66char Lexer::advance() {
67 auto current{input_[position_++]};
69 if (current ==
'\n') {
76template <std::predicate<
char> Pred>
77void Lexer::advance_while(
const Pred& pred)
noexcept {
78 while (chars_remaining() && pred(peek())) {
83void Lexer::lex_whitespace() noexcept {
84 advance_while([
this](
auto ch) {
return is_whitespace(ch); });
87Token Lexer::lex_word() noexcept {
88 auto start{position_};
89 advance_while([
this](
auto ch) {
return is_alnum(ch); });
90 auto lexeme{make_lexeme_from(start)};
91 if (WORD_TO_TOKEN_TYPE.contains(lexeme)) {
92 return make_token(WORD_TO_TOKEN_TYPE.at(lexeme), lexeme);
94 return make_token(TokenType::IDENTIFIER, lexeme);
97Token Lexer::lex_number() noexcept {
98 auto start{position_};
102 advance_while([
this](
auto ch) {
return is_digit(ch); });
103 if (peek() ==
'.' && is_digit(peek_next())) {
105 advance_while([
this](
auto ch) {
return is_digit(ch); });
107 return make_token(TokenType::NUMBER, make_lexeme_from(start));
110void Lexer::lex_comment() noexcept {
111 advance_while([
this](
auto ch) {
return ch !=
'\n'; });
114Token Lexer::lex_equal() noexcept {
116 return make_token(TokenType::EQUAL,
"=");
119Token Lexer::lex_comma() noexcept {
121 return make_token(TokenType::COMMA,
",");
124Token Lexer::lex_semicolon() noexcept {
126 return make_token(TokenType::SEMICOLON,
";");
129Token Lexer::lex_unknown() noexcept {
130 auto unknown{peek()};
132 return make_token(TokenType::UNKNOWN, {unknown});
135Token Lexer::lex_end_of_file() noexcept {
136 return make_token(TokenType::END_OF_FILE,
"");
139Lexeme Lexer::make_lexeme_from(size_type start)
const {
140 return input_.substr(start, position_ - start);
143Token Lexer::make_token(TokenType type,
const Lexeme& lexeme)
noexcept {
144 return Token{type, lexeme, line_, column_ - lexeme.length()};
std::vector< Token > tokenize()
Tokenize the input string.
Lexer()=delete
Deleted default constructor.