Created
May 25, 2023 22:59
-
-
Save sleexyz/755a26d5d6d7342cdd0a5e55cc22633f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "tree_sitter/parser.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <wchar.h> | |
enum TokenType { | |
INDENT, | |
DEDENT, | |
NEWLINE, | |
}; | |
typedef struct { | |
uint16_t indent_length_stack[1024]; | |
size_t indent_length_stack_size; | |
} Scanner; | |
void *tree_sitter_puddlejumper_external_scanner_create() { | |
Scanner *scanner = calloc(1, sizeof(Scanner)); | |
return scanner; | |
} | |
void tree_sitter_puddlejumper_external_scanner_destroy(void *payload) { | |
Scanner *scanner = (Scanner *)payload; | |
free(scanner); | |
} | |
unsigned tree_sitter_puddlejumper_external_scanner_serialize(void *payload, | |
char *buffer) { | |
Scanner *scanner = (Scanner *)payload; | |
size_t i = 0; | |
// The first element of the stack is always 0. | |
for (size_t j = 1; j < scanner->indent_length_stack_size && | |
i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; | |
++j) { | |
buffer[i++] = scanner->indent_length_stack[j]; | |
} | |
return i; | |
} | |
void tree_sitter_puddlejumper_external_scanner_deserialize(void *payload, | |
const char *buffer, | |
unsigned length) { | |
Scanner *scanner = (Scanner *)payload; | |
scanner->indent_length_stack_size = 1; | |
if (length > 0) { | |
for (size_t i = 0; i < length; i++) { | |
scanner->indent_length_stack_size++; | |
scanner->indent_length_stack[scanner->indent_length_stack_size - 1] = | |
buffer[i]; | |
} | |
} | |
} | |
bool tree_sitter_puddlejumper_external_scanner_scan(void *payload, | |
TSLexer *lexer, | |
const bool *valid_symbols) { | |
Scanner *scanner = (Scanner *)payload; | |
lexer->mark_end(lexer); | |
bool found_end_of_line = false; | |
uint32_t indent_length = 0; | |
for (;;) { | |
if (lexer->lookahead == '\n') { | |
found_end_of_line = true; | |
indent_length = 0; | |
lexer->advance(lexer, false); | |
} else if (lexer->lookahead == ' ') { | |
indent_length++; | |
lexer->advance(lexer, false); | |
} else if (lexer->lookahead == '\t') { | |
indent_length += 8; | |
lexer->advance(lexer, false); | |
} else if (lexer->lookahead == '\r') { | |
indent_length = 0; | |
lexer->advance(lexer, false); | |
} else if (lexer->lookahead == '\f') { | |
indent_length = 0; | |
lexer->advance(lexer, false); | |
} else if (lexer->lookahead == 0) { | |
indent_length = 0; | |
found_end_of_line = true; | |
break; | |
} else { | |
break; | |
} | |
} | |
if (found_end_of_line) { | |
uint16_t current_indent_length = | |
scanner->indent_length_stack_size == 0 | |
? 0 | |
: scanner | |
->indent_length_stack[scanner->indent_length_stack_size - 1]; | |
if (!(scanner->indent_length_stack_size == 0)) { | |
if (valid_symbols[INDENT] && indent_length > current_indent_length) { | |
scanner->indent_length_stack_size++; | |
scanner->indent_length_stack[scanner->indent_length_stack_size - 1] = | |
indent_length; | |
lexer->result_symbol = INDENT; | |
return true; | |
} | |
if ((valid_symbols[DEDENT] || !valid_symbols[NEWLINE]) && | |
indent_length < current_indent_length) { | |
scanner->indent_length_stack_size--; | |
lexer->result_symbol = DEDENT; | |
return true; | |
} | |
} | |
if (valid_symbols[NEWLINE] && | |
(current_indent_length == indent_length || | |
(!valid_symbols[INDENT] && !valid_symbols[DEDENT]))) { | |
lexer->result_symbol = NEWLINE; | |
return true; | |
} | |
} | |
return false; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment