Setup a test-grammars local repository for testing

Previously we had a dependency on the grammar repository that will be
used for language support in Helix. Instead this commit switches us to
a local repository inline here in the test-grammars directory.

We don't need Helix's full language support for testing and using that
repository means that we could need to update tests to accommodate for
updated grammars and queries in that repository. Instead we can
duplicate part of that repository here.

This also makes testing a bit simpler - previously we were downloading
a (currently private) repository when running `cargo test` which is a
bit icky and complicates the CI setup. (CI is currently failing because
we haven't given the proper credentials to the runner.) Testing with
locals files is a much smoother experience.
This commit is contained in:
Michael Davis
2025-02-12 19:29:20 -05:00
parent 6871cf5810
commit cecc621e06
54 changed files with 6446 additions and 7 deletions

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
test-grammars/*/*.scm linguist-vendored
test-grammars/*/src/** linguist-vendored
test-grammars/*/src/{parser.c,grammar.json,scanner.*} binary

3
.gitignore vendored
View File

@@ -1,4 +1,5 @@
target
result
.direnv
test-grammars/
/test-grammars/*/*.so
/test-grammars/*/.BUILD_COOKIE

View File

@@ -15,7 +15,6 @@ use crate::injections_query::{InjectionLanguageMarker, InjectionsQuery};
use crate::Language;
static GRAMMARS: Lazy<Vec<PathBuf>> = Lazy::new(|| {
fs::create_dir_all("../test-grammars").unwrap();
let skidder_config = skidder_config();
skidder::fetch(&skidder_config, false).unwrap();
skidder::build_all_grammars(&skidder_config, false, None).unwrap();
@@ -26,12 +25,11 @@ static GRAMMARS: Lazy<Vec<PathBuf>> = Lazy::new(|| {
fn skidder_config() -> skidder::Config {
skidder::Config {
repos: vec![Repo::Git {
name: "helix-language-support".to_owned(),
remote: "git@github.com:helix-editor/tree-sitter-grammars.git".into(),
branch: "reversed".into(),
repos: vec![Repo::Local {
// `./test-grammars` in the root of the repo.
path: Path::new("../test-grammars").canonicalize().unwrap(),
}],
index: Path::new("../test-grammars").canonicalize().unwrap(),
index: PathBuf::new(),
verbose: true,
}
}

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Santos Gallegos
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

41
test-grammars/comment/highlights.scm vendored Normal file
View File

@@ -0,0 +1,41 @@
(tag
(name) @ui.text
(user)? @constant)
; Hint level tags
((tag (name) @hint)
(#any-of? @hint "HINT" "MARK" "PASSED" "STUB" "MOCK"))
("text" @hint
(#any-of? @hint "HINT" "MARK" "PASSED" "STUB" "MOCK"))
; Info level tags
((tag (name) @info)
(#any-of? @info "INFO" "NOTE" "TODO" "PERF" "OPTIMIZE" "PERFORMANCE" "QUESTION" "ASK"))
("text" @info
(#any-of? @info "INFO" "NOTE" "TODO" "PERF" "OPTIMIZE" "PERFORMANCE" "QUESTION" "ASK"))
; Warning level tags
((tag (name) @warning)
(#any-of? @warning "HACK" "WARN" "WARNING" "TEST" "TEMP"))
("text" @warning
(#any-of? @warning "HACK" "WARN" "WARNING" "TEST" "TEMP"))
; Error level tags
((tag (name) @error)
(#any-of? @error "BUG" "FIXME" "ISSUE" "XXX" "FIX" "SAFETY" "FIXIT" "FAILED" "DEBUG" "INVARIANT" "COMPLIANCE"))
("text" @error
(#any-of? @error "BUG" "FIXME" "ISSUE" "XXX" "FIX" "SAFETY" "FIXIT" "FAILED" "DEBUG" "INVARIANT" "COMPLIANCE"))
; Issue number (#123)
("text" @constant.numeric
(#match? @constant.numeric "^#[0-9]+$"))
; User mention (@user)
("text" @tag
(#match? @tag "^[@][a-zA-Z0-9_-]+$"))
(uri) @markup.link.url

View File

@@ -0,0 +1,6 @@
{
"repo": "https://github.com/stsewd/tree-sitter-comment",
"rev": "aefcc2813392eb6ffe509aa0fc8b4e9b57413ee1",
"license": "MIT",
"compressed": true
}

BIN
test-grammars/comment/src/grammar.json vendored Normal file

Binary file not shown.

BIN
test-grammars/comment/src/parser.c vendored Normal file

Binary file not shown.

35
test-grammars/comment/src/scanner.c vendored Normal file
View File

@@ -0,0 +1,35 @@
#include <tree_sitter/parser.h>
#include "tree_sitter_comment/parser.c"
#include "tree_sitter_comment/tokens.h"
void* tree_sitter_comment_external_scanner_create()
{
return NULL;
}
void tree_sitter_comment_external_scanner_destroy(void* payload)
{
}
unsigned tree_sitter_comment_external_scanner_serialize(
void* payload,
char* buffer)
{
return 0;
}
void tree_sitter_comment_external_scanner_deserialize(
void* payload,
const char* buffer,
unsigned length)
{
}
bool tree_sitter_comment_external_scanner_scan(
void* payload,
TSLexer* lexer,
const bool* valid_symbols)
{
return parse(lexer, valid_symbols);
}

View File

@@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -0,0 +1,66 @@
#include "chars.h"
bool is_upper(int32_t c)
{
const int32_t upper = 65;
const int32_t lower = 90;
return c >= upper && c <= lower;
}
bool is_digit(int32_t c)
{
const int32_t upper = 48;
const int32_t lower = 57;
return c >= upper && c <= lower;
}
bool is_newline(int32_t c)
{
const int32_t newline_chars[] = {
CHAR_EOF,
CHAR_NEWLINE,
CHAR_CARRIAGE_RETURN,
};
const int length = sizeof(newline_chars) / sizeof(int32_t);
for (int i = 0; i < length; i++) {
if (c == newline_chars[i]) {
return true;
}
}
return false;
}
bool is_space(int32_t c)
{
const int32_t space_chars[] = {
CHAR_SPACE,
CHAR_FORM_FEED,
CHAR_TAB,
CHAR_VERTICAL_TAB,
};
const int length = sizeof(space_chars) / sizeof(int32_t);
bool is_space_char = false;
for (int i = 0; i < length; i++) {
if (c == space_chars[i]) {
is_space_char = true;
break;
}
}
return is_space_char || is_newline(c);
}
/// Check if the character is allowed inside the name.
bool is_internal_char(int32_t c)
{
const int32_t valid_chars[] = {
'-',
'_',
};
const int length = sizeof(valid_chars) / sizeof(int32_t);
for (int i = 0; i < length; i++) {
if (c == valid_chars[i]) {
return true;
}
}
return false;
}

View File

@@ -0,0 +1,22 @@
#ifndef TREE_SITTER_COMMENT_CHARS_H
#define TREE_SITTER_COMMENT_CHARS_H
#include <stdbool.h>
#include <stdint.h>
#define CHAR_EOF 0
#define CHAR_NEWLINE 10
#define CHAR_CARRIAGE_RETURN 13
#define CHAR_SPACE ' '
#define CHAR_FORM_FEED '\f'
#define CHAR_TAB '\t'
#define CHAR_VERTICAL_TAB '\v'
bool is_internal_char(int32_t c);
bool is_newline(int32_t c);
bool is_space(int32_t c);
bool is_upper(int32_t c);
bool is_digit(int32_t c);
#endif /* ifndef TREE_SITTER_COMMENT_CHARS_H */

View File

@@ -0,0 +1,97 @@
#include "parser.h"
#include "chars.c"
#include "tokens.h"
#include <stdbool.h>
#include <stdio.h>
/// Parse the name of the tag.
///
/// They can be of the form:
/// - TODO:
/// - TODO: text
/// - TODO(stsewd):
/// - TODO(stsewd): text
/// - TODO (stsewd): text
bool parse_tagname(TSLexer* lexer, const bool* valid_symbols)
{
if (!is_upper(lexer->lookahead) || !valid_symbols[T_TAGNAME]) {
return false;
}
int32_t previous = lexer->lookahead;
lexer->advance(lexer, false);
while (is_upper(lexer->lookahead)
|| is_digit(lexer->lookahead)
|| is_internal_char(lexer->lookahead)) {
previous = lexer->lookahead;
lexer->advance(lexer, false);
}
// The tag name ends here.
// But we keep parsing to see if it's a valid tag name.
lexer->mark_end(lexer);
// It can't end with an internal char.
if (is_internal_char(previous)) {
return false;
}
// For the user component this is `\s*(`.
// We don't parse that part, we just need to be sure it ends with `:\s`.
if ((is_space(lexer->lookahead) && !is_newline(lexer->lookahead))
|| lexer->lookahead == '(') {
// Skip white spaces.
while (is_space(lexer->lookahead) && !is_newline(lexer->lookahead)) {
lexer->advance(lexer, false);
}
// Checking aperture.
if (lexer->lookahead != '(') {
return false;
}
lexer->advance(lexer, false);
// Checking closure.
int user_length = 0;
while (lexer->lookahead != ')') {
if (is_newline(lexer->lookahead)) {
return false;
}
lexer->advance(lexer, false);
user_length++;
}
if (user_length <= 0) {
return false;
}
lexer->advance(lexer, false);
}
// It should end with `:`...
if (lexer->lookahead != ':') {
return false;
}
// ... and be followed by one space.
lexer->advance(lexer, false);
if (!is_space(lexer->lookahead)) {
return false;
}
lexer->result_symbol = T_TAGNAME;
return true;
}
bool parse(TSLexer* lexer, const bool* valid_symbols)
{
// If all valid symbols are true, tree-sitter is in correction mode.
// We don't want to parse anything in that case.
if (valid_symbols[T_INVALID_TOKEN]) {
return false;
}
if (is_upper(lexer->lookahead) && valid_symbols[T_TAGNAME]) {
return parse_tagname(lexer, valid_symbols);
}
return false;
}

View File

@@ -0,0 +1,9 @@
#ifndef TREE_SITTER_COMMENT_PARSER_H
#define TREE_SITTER_COMMENT_PARSER_H
#include <tree_sitter/parser.h>
bool parse_tagname(TSLexer* lexer, const bool* valid_symbols);
bool parse(TSLexer* lexer, const bool* valid_symbols);
#endif /* ifndef TREE_SITTER_COMMENT_PARSER_H */

View File

@@ -0,0 +1,9 @@
#ifndef TREE_SITTER_COMMENT_TOKENS_H
#define TREE_SITTER_COMMENT_TOKENS_H
enum TokenType {
T_TAGNAME,
T_INVALID_TOKEN,
};
#endif /* ifndef TREE_SITTER_COMMENT_TOKENS_H */

View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2014 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

47
test-grammars/html/highlights.scm vendored Normal file
View File

@@ -0,0 +1,47 @@
(tag_name) @tag
(erroneous_end_tag_name) @error
(doctype) @constant
(attribute_name) @attribute
(attribute [(attribute_value) (quoted_attribute_value)] @string)
((attribute
(attribute_name) @attribute
(quoted_attribute_value (attribute_value) @markup.link.url))
(#any-of? @attribute "href" "src"))
((element
(start_tag
(tag_name) @_tag)
(text) @markup.link.label)
(#eq? @_tag "a"))
((element
(start_tag
(tag_name) @_tag)
(text) @markup.bold)
(#any-of? @_tag "strong" "b"))
((element
(start_tag
(tag_name) @_tag)
(text) @markup.italic)
(#any-of? @_tag "em" "i"))
((element
(start_tag
(tag_name) @_tag)
(text) @markup.strikethrough)
(#any-of? @_tag "s" "del"))
[
"<"
">"
"</"
"/>"
"<!"
] @punctuation.bracket
"=" @punctuation.delimiter
(comment) @comment

10
test-grammars/html/injections.scm vendored Normal file
View File

@@ -0,0 +1,10 @@
((comment) @injection.content
(#set! injection.language "comment"))
((script_element
(raw_text) @injection.content)
(#set! injection.language "javascript"))
((style_element
(raw_text) @injection.content)
(#set! injection.language "css"))

View File

@@ -0,0 +1,6 @@
{
"repo": "https://github.com/tree-sitter/tree-sitter-html",
"rev": "29f53d8f4f2335e61bf6418ab8958dac3282077a",
"license": "MIT",
"compressed": true
}

BIN
test-grammars/html/src/grammar.json vendored Normal file

Binary file not shown.

BIN
test-grammars/html/src/parser.c vendored Normal file

Binary file not shown.

310
test-grammars/html/src/scanner.cc vendored Normal file
View File

@@ -0,0 +1,310 @@
#include <tree_sitter/parser.h>
#include <algorithm>
#include <vector>
#include <string>
#include <cwctype>
#include <cstring>
#include "tag.h"
namespace {
using std::vector;
using std::string;
enum TokenType {
START_TAG_NAME,
SCRIPT_START_TAG_NAME,
STYLE_START_TAG_NAME,
END_TAG_NAME,
ERRONEOUS_END_TAG_NAME,
SELF_CLOSING_TAG_DELIMITER,
IMPLICIT_END_TAG,
RAW_TEXT,
COMMENT
};
struct Scanner {
Scanner() {}
unsigned serialize(char *buffer) {
uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();
uint16_t serialized_tag_count = 0;
unsigned i = sizeof(tag_count);
std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));
i += sizeof(tag_count);
for (; serialized_tag_count < tag_count; serialized_tag_count++) {
Tag &tag = tags[serialized_tag_count];
if (tag.type == CUSTOM) {
unsigned name_length = tag.custom_tag_name.size();
if (name_length > UINT8_MAX) name_length = UINT8_MAX;
if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
buffer[i++] = static_cast<char>(tag.type);
buffer[i++] = name_length;
tag.custom_tag_name.copy(&buffer[i], name_length);
i += name_length;
} else {
if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
buffer[i++] = static_cast<char>(tag.type);
}
}
std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
return i;
}
void deserialize(const char *buffer, unsigned length) {
tags.clear();
if (length > 0) {
unsigned i = 0;
uint16_t tag_count, serialized_tag_count;
std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));
i += sizeof(serialized_tag_count);
std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));
i += sizeof(tag_count);
tags.resize(tag_count);
for (unsigned j = 0; j < serialized_tag_count; j++) {
Tag &tag = tags[j];
tag.type = static_cast<TagType>(buffer[i++]);
if (tag.type == CUSTOM) {
uint16_t name_length = static_cast<uint8_t>(buffer[i++]);
tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);
i += name_length;
}
}
}
}
string scan_tag_name(TSLexer *lexer) {
string tag_name;
while (iswalnum(lexer->lookahead) ||
lexer->lookahead == '-' ||
lexer->lookahead == ':') {
tag_name += towupper(lexer->lookahead);
lexer->advance(lexer, false);
}
return tag_name;
}
bool scan_comment(TSLexer *lexer) {
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
unsigned dashes = 0;
while (lexer->lookahead) {
switch (lexer->lookahead) {
case '-':
++dashes;
break;
case '>':
if (dashes >= 2) {
lexer->result_symbol = COMMENT;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
return true;
}
default:
dashes = 0;
}
lexer->advance(lexer, false);
}
return false;
}
bool scan_raw_text(TSLexer *lexer) {
if (!tags.size()) return false;
lexer->mark_end(lexer);
const string &end_delimiter = tags.back().type == SCRIPT
? "</SCRIPT"
: "</STYLE";
unsigned delimiter_index = 0;
while (lexer->lookahead) {
if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
delimiter_index++;
if (delimiter_index == end_delimiter.size()) break;
lexer->advance(lexer, false);
} else {
delimiter_index = 0;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
}
}
lexer->result_symbol = RAW_TEXT;
return true;
}
bool scan_implicit_end_tag(TSLexer *lexer) {
Tag *parent = tags.empty() ? NULL : &tags.back();
bool is_closing_tag = false;
if (lexer->lookahead == '/') {
is_closing_tag = true;
lexer->advance(lexer, false);
} else {
if (parent && parent->is_void()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
}
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag next_tag = Tag::for_name(tag_name);
if (is_closing_tag) {
// The tag correctly closes the topmost element on the stack
if (!tags.empty() && tags.back() == next_tag) return false;
// Otherwise, dig deeper and queue implicit end tags (to be nice in
// the case of malformed HTML)
if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
} else if (parent && !parent->can_contain(next_tag)) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
return false;
}
bool scan_start_tag_name(TSLexer *lexer) {
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag tag = Tag::for_name(tag_name);
tags.push_back(tag);
switch (tag.type) {
case SCRIPT:
lexer->result_symbol = SCRIPT_START_TAG_NAME;
break;
case STYLE:
lexer->result_symbol = STYLE_START_TAG_NAME;
break;
default:
lexer->result_symbol = START_TAG_NAME;
break;
}
return true;
}
bool scan_end_tag_name(TSLexer *lexer) {
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag tag = Tag::for_name(tag_name);
if (!tags.empty() && tags.back() == tag) {
tags.pop_back();
lexer->result_symbol = END_TAG_NAME;
} else {
lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
}
return true;
}
bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
lexer->advance(lexer, false);
if (lexer->lookahead == '>') {
lexer->advance(lexer, false);
if (!tags.empty()) {
tags.pop_back();
lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
}
return true;
}
return false;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
while (iswspace(lexer->lookahead)) {
lexer->advance(lexer, true);
}
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
return scan_raw_text(lexer);
}
switch (lexer->lookahead) {
case '<':
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '!') {
lexer->advance(lexer, false);
return scan_comment(lexer);
}
if (valid_symbols[IMPLICIT_END_TAG]) {
return scan_implicit_end_tag(lexer);
}
break;
case '\0':
if (valid_symbols[IMPLICIT_END_TAG]) {
return scan_implicit_end_tag(lexer);
}
break;
case '/':
if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
return scan_self_closing_tag_delimiter(lexer);
}
break;
default:
if ((valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) && !valid_symbols[RAW_TEXT]) {
return valid_symbols[START_TAG_NAME]
? scan_start_tag_name(lexer)
: scan_end_tag_name(lexer);
}
}
return false;
}
vector<Tag> tags;
};
}
extern "C" {
void *tree_sitter_html_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
void tree_sitter_html_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}

380
test-grammars/html/src/tag.h vendored Normal file
View File

@@ -0,0 +1,380 @@
#include <string>
#include <map>
using std::string;
using std::map;
enum TagType {
AREA,
BASE,
BASEFONT,
BGSOUND,
BR,
COL,
COMMAND,
EMBED,
FRAME,
HR,
IMAGE,
IMG,
INPUT,
ISINDEX,
KEYGEN,
LINK,
MENUITEM,
META,
NEXTID,
PARAM,
SOURCE,
TRACK,
WBR,
END_OF_VOID_TAGS,
A,
ABBR,
ADDRESS,
ARTICLE,
ASIDE,
AUDIO,
B,
BDI,
BDO,
BLOCKQUOTE,
BODY,
BUTTON,
CANVAS,
CAPTION,
CITE,
CODE,
COLGROUP,
DATA,
DATALIST,
DD,
DEL,
DETAILS,
DFN,
DIALOG,
DIV,
DL,
DT,
EM,
FIELDSET,
FIGCAPTION,
FIGURE,
FOOTER,
FORM,
H1,
H2,
H3,
H4,
H5,
H6,
HEAD,
HEADER,
HGROUP,
HTML,
I,
IFRAME,
INS,
KBD,
LABEL,
LEGEND,
LI,
MAIN,
MAP,
MARK,
MATH,
MENU,
METER,
NAV,
NOSCRIPT,
OBJECT,
OL,
OPTGROUP,
OPTION,
OUTPUT,
P,
PICTURE,
PRE,
PROGRESS,
Q,
RB,
RP,
RT,
RTC,
RUBY,
S,
SAMP,
SCRIPT,
SECTION,
SELECT,
SLOT,
SMALL,
SPAN,
STRONG,
STYLE,
SUB,
SUMMARY,
SUP,
SVG,
TABLE,
TBODY,
TD,
TEMPLATE,
TEXTAREA,
TFOOT,
TH,
THEAD,
TIME,
TITLE,
TR,
U,
UL,
VAR,
VIDEO,
CUSTOM,
};
static const map<string, TagType> get_tag_map() {
map<string, TagType> result;
#define TAG(name) result[#name] = name
TAG(AREA);
TAG(BASE);
TAG(BASEFONT);
TAG(BGSOUND);
TAG(BR);
TAG(COL);
TAG(COMMAND);
TAG(EMBED);
TAG(FRAME);
TAG(HR);
TAG(IMAGE);
TAG(IMG);
TAG(INPUT);
TAG(ISINDEX);
TAG(KEYGEN);
TAG(LINK);
TAG(MENUITEM);
TAG(META);
TAG(NEXTID);
TAG(PARAM);
TAG(SOURCE);
TAG(TRACK);
TAG(WBR);
TAG(A);
TAG(ABBR);
TAG(ADDRESS);
TAG(ARTICLE);
TAG(ASIDE);
TAG(AUDIO);
TAG(B);
TAG(BDI);
TAG(BDO);
TAG(BLOCKQUOTE);
TAG(BODY);
TAG(BUTTON);
TAG(CANVAS);
TAG(CAPTION);
TAG(CITE);
TAG(CODE);
TAG(COLGROUP);
TAG(DATA);
TAG(DATALIST);
TAG(DD);
TAG(DEL);
TAG(DETAILS);
TAG(DFN);
TAG(DIALOG);
TAG(DIV);
TAG(DL);
TAG(DT);
TAG(EM);
TAG(FIELDSET);
TAG(FIGCAPTION);
TAG(FIGURE);
TAG(FOOTER);
TAG(FORM);
TAG(H1);
TAG(H2);
TAG(H3);
TAG(H4);
TAG(H5);
TAG(H6);
TAG(HEAD);
TAG(HEADER);
TAG(HGROUP);
TAG(HTML);
TAG(I);
TAG(IFRAME);
TAG(INS);
TAG(KBD);
TAG(LABEL);
TAG(LEGEND);
TAG(LI);
TAG(MAIN);
TAG(MAP);
TAG(MARK);
TAG(MATH);
TAG(MENU);
TAG(METER);
TAG(NAV);
TAG(NOSCRIPT);
TAG(OBJECT);
TAG(OL);
TAG(OPTGROUP);
TAG(OPTION);
TAG(OUTPUT);
TAG(P);
TAG(PICTURE);
TAG(PRE);
TAG(PROGRESS);
TAG(Q);
TAG(RB);
TAG(RP);
TAG(RT);
TAG(RTC);
TAG(RUBY);
TAG(S);
TAG(SAMP);
TAG(SCRIPT);
TAG(SECTION);
TAG(SELECT);
TAG(SLOT);
TAG(SMALL);
TAG(SPAN);
TAG(STRONG);
TAG(STYLE);
TAG(SUB);
TAG(SUMMARY);
TAG(SUP);
TAG(SVG);
TAG(TABLE);
TAG(TBODY);
TAG(TD);
TAG(TEMPLATE);
TAG(TEXTAREA);
TAG(TFOOT);
TAG(TH);
TAG(THEAD);
TAG(TIME);
TAG(TITLE);
TAG(TR);
TAG(U);
TAG(UL);
TAG(VAR);
TAG(VIDEO);
#undef TAG
return result;
}
static const map<string, TagType> TAG_TYPES_BY_TAG_NAME = get_tag_map();
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
ADDRESS,
ARTICLE,
ASIDE,
BLOCKQUOTE,
DETAILS,
DIV,
DL,
FIELDSET,
FIGCAPTION,
FIGURE,
FOOTER,
FORM,
H1,
H2,
H3,
H4,
H5,
H6,
HEADER,
HR,
MAIN,
NAV,
OL,
P,
PRE,
SECTION,
};
static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
sizeof(TagType)
);
struct Tag {
TagType type;
string custom_tag_name;
// This default constructor is used in the case where there is not enough space
// in the serialization buffer to store all of the tags. In that case, tags
// that cannot be serialized will be treated as having an unknown type. These
// tags will be closed via implicit end tags regardless of the next closing
// tag is encountered.
Tag() : type(END_OF_VOID_TAGS) {}
Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
bool operator==(const Tag &other) const {
if (type != other.type) return false;
if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
return true;
}
inline bool is_void() const {
return type < END_OF_VOID_TAGS;
}
inline bool can_contain(const Tag &tag) {
TagType child = tag.type;
switch (type) {
case LI: return child != LI;
case DT:
case DD:
return child != DT && child != DD;
case P:
return std::find(
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
tag.type
) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
case COLGROUP:
return child == COL;
case RB:
case RT:
case RP:
return child != RB && child != RT && child != RP;
case OPTGROUP:
return child != OPTGROUP;
case TR:
return child != TR;
case TD:
case TH:
return child != TD && child != TH && child != TR;
default:
return true;
}
}
static inline Tag for_name(const string &name) {
map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
if (type != TAG_TYPES_BY_TAG_NAME.end()) {
return Tag(type->second, string());
} else {
return Tag(CUSTOM, name);
}
}
};

View File

@@ -0,0 +1,223 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Matthias Deiml
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,39 @@
;; From nvim-treesitter/nvim-treesitter
[
(code_span)
(link_title)
] @markup.raw.inline
[
(emphasis_delimiter)
(code_span_delimiter)
] @punctuation.bracket
(emphasis) @markup.italic
(strong_emphasis) @markup.bold
(strikethrough) @markup.strikethrough
[
(link_destination)
(uri_autolink)
] @markup.link.url
[
(link_text)
(image_description)
] @markup.link.text
(link_label) @markup.link.label
[
(backslash_escape)
(hard_line_break)
] @constant.character.escape
(image ["[" "]" "(" ")"] @punctuation.bracket)
(image "!" @punctuation.special)
(inline_link ["[" "]" "(" ")"] @punctuation.bracket)
(shortcut_link ["[" "]"] @punctuation.bracket)

View File

@@ -0,0 +1,7 @@
((html_tag) @injection.content
(#set! injection.language "html")
(#set! injection.include-unnamed-children)
(#set! injection.combined))
((latex_block) @injection.content (#set! injection.language "latex") (#set! injection.include-unnamed-children))

View File

@@ -0,0 +1,6 @@
{
"repo": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
"rev": "62516e8c78380e3b51d5b55727995d2c511436d8",
"license": "MIT",
"compressed": true
}

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,397 @@
#include "tree_sitter/parser.h"
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
// For explanation of the tokens see grammar.js
typedef enum {
ERROR,
TRIGGER_ERROR,
CODE_SPAN_START,
CODE_SPAN_CLOSE,
EMPHASIS_OPEN_STAR,
EMPHASIS_OPEN_UNDERSCORE,
EMPHASIS_CLOSE_STAR,
EMPHASIS_CLOSE_UNDERSCORE,
LAST_TOKEN_WHITESPACE,
LAST_TOKEN_PUNCTUATION,
STRIKETHROUGH_OPEN,
STRIKETHROUGH_CLOSE,
LATEX_SPAN_START,
LATEX_SPAN_CLOSE,
UNCLOSED_SPAN
} TokenType;
// Determines if a character is punctuation as defined by the markdown spec.
static bool is_punctuation(char chr) {
return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
(chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
}
// State bitflags used with `Scanner.state`
// TODO
static UNUSED const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
// Current delimiter run is opening
static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
// Convenience function to emit the error token. This is done to stop invalid
// parse branches. Specifically:
// 1. When encountering a newline after a line break that ended a paragraph, and
// no new block
// has been opened.
// 2. When encountering a new block after a soft line break.
// 3. When a `$._trigger_error` token is valid, which is used to stop parse
// branches through
// normal tree-sitter grammar rules.
//
// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
// grammar.js
static bool error(TSLexer *lexer) {
lexer->result_symbol = ERROR;
return true;
}
typedef struct {
// Parser state flags
uint8_t state;
uint8_t code_span_delimiter_length;
uint8_t latex_span_delimiter_length;
// The number of characters remaining in the currrent emphasis delimiter
// run.
uint8_t num_emphasis_delimiters_left;
} Scanner;
// Write the whole state of a Scanner to a byte buffer
static unsigned serialize(Scanner *s, char *buffer) {
unsigned size = 0;
buffer[size++] = (char)s->state;
buffer[size++] = (char)s->code_span_delimiter_length;
buffer[size++] = (char)s->latex_span_delimiter_length;
buffer[size++] = (char)s->num_emphasis_delimiters_left;
return size;
}
// Read the whole state of a Scanner from a byte buffer
// `serizalize` and `deserialize` should be fully symmetric.
static void deserialize(Scanner *s, const char *buffer, unsigned length) {
s->state = 0;
s->code_span_delimiter_length = 0;
s->latex_span_delimiter_length = 0;
s->num_emphasis_delimiters_left = 0;
if (length > 0) {
size_t size = 0;
s->state = (uint8_t)buffer[size++];
s->code_span_delimiter_length = (uint8_t)buffer[size++];
s->latex_span_delimiter_length = (uint8_t)buffer[size++];
s->num_emphasis_delimiters_left = (uint8_t)buffer[size++];
}
}
static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t *delimiter_length,
const bool *valid_symbols,
const char delimiter,
const TokenType open_token,
const TokenType close_token) {
uint8_t level = 0;
while (lexer->lookahead == delimiter) {
lexer->advance(lexer, false);
level++;
}
lexer->mark_end(lexer);
if (level == *delimiter_length && valid_symbols[close_token]) {
*delimiter_length = 0;
lexer->result_symbol = close_token;
return true;
}
if (valid_symbols[open_token]) {
// Parse ahead to check if there is a closing delimiter
size_t close_level = 0;
while (!lexer->eof(lexer)) {
if (lexer->lookahead == delimiter) {
close_level++;
} else {
if (close_level == level) {
// Found a matching delimiter
break;
}
close_level = 0;
}
lexer->advance(lexer, false);
}
if (close_level == level) {
*delimiter_length = level;
lexer->result_symbol = open_token;
return true;
}
if (valid_symbols[UNCLOSED_SPAN]) {
lexer->result_symbol = UNCLOSED_SPAN;
return true;
}
}
return false;
}
static bool parse_backtick(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length,
valid_symbols, '`', CODE_SPAN_START,
CODE_SPAN_CLOSE);
}
static bool parse_dollar(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length,
valid_symbols, '$', LATEX_SPAN_START,
LATEX_SPAN_CLOSE);
}
static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[EMPHASIS_OPEN_STAR]) {
s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
lexer->result_symbol = EMPHASIS_OPEN_STAR;
s->num_emphasis_delimiters_left--;
return true;
}
if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of stars
uint8_t star_count = 1;
while (lexer->lookahead == '*') {
star_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_STAR] ||
valid_symbols[EMPHASIS_CLOSE_STAR]) {
// The desicion made for the first star also counts for all the
// following stars in the delimiter run. Rembemer how many there are.
s->num_emphasis_delimiters_left = star_count - 1;
// Look ahead to the next symbol (after the last star) to find out if it
// is whitespace punctuation or other.
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js
// for these tokens for how this is done.
if (valid_symbols[EMPHASIS_CLOSE_STAR] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace)) {
// Closing delimiters take precedence
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
return true;
}
if (!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_OPEN_STAR;
return true;
}
}
return false;
}
static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[STRIKETHROUGH_OPEN]) {
s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
lexer->result_symbol = STRIKETHROUGH_OPEN;
s->num_emphasis_delimiters_left--;
return true;
}
if (valid_symbols[STRIKETHROUGH_CLOSE]) {
lexer->result_symbol = STRIKETHROUGH_CLOSE;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of tildes
uint8_t star_count = 1;
while (lexer->lookahead == '~') {
star_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[STRIKETHROUGH_OPEN] ||
valid_symbols[STRIKETHROUGH_CLOSE]) {
// The desicion made for the first star also counts for all the
// following stars in the delimiter run. Rembemer how many there are.
s->num_emphasis_delimiters_left = star_count - 1;
// Look ahead to the next symbol (after the last star) to find out if it
// is whitespace punctuation or other.
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js
// for these tokens for how this is done.
if (valid_symbols[STRIKETHROUGH_CLOSE] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace)) {
// Closing delimiters take precedence
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = STRIKETHROUGH_CLOSE;
return true;
}
if (!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = STRIKETHROUGH_OPEN;
return true;
}
}
return false;
}
static bool parse_underscore(Scanner *s, TSLexer *lexer,
const bool *valid_symbols) {
lexer->advance(lexer, false);
// If `num_emphasis_delimiters_left` is not zero then we already decided
// that this should be part of an emphasis delimiter run, so interpret it as
// such.
if (s->num_emphasis_delimiters_left > 0) {
// The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
// should be open or close.
if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
s->num_emphasis_delimiters_left--;
return true;
}
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
s->num_emphasis_delimiters_left--;
return true;
}
}
lexer->mark_end(lexer);
// Otherwise count the number of stars
uint8_t underscore_count = 1;
while (lexer->lookahead == '_') {
underscore_count++;
lexer->advance(lexer, false);
}
bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->eof(lexer);
if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] ||
valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
// The desicion made for the first underscore also counts for all the
// following underscores in the delimiter run. Rembemer how many there are.
s->num_emphasis_delimiters_left = underscore_count - 1;
// Look ahead to the next symbol (after the last underscore) to find out if it
// is whitespace punctuation or other.
bool next_symbol_whitespace =
line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
// Information about the last token is in valid_symbols. See grammar.js
// for these tokens for how this is done.
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] &&
!valid_symbols[LAST_TOKEN_WHITESPACE] &&
(!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
next_symbol_punctuation || next_symbol_whitespace)) {
// Closing delimiters take precedence
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
return true;
}
if (!next_symbol_whitespace && (!next_symbol_punctuation ||
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
valid_symbols[LAST_TOKEN_WHITESPACE])) {
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
return true;
}
}
return false;
}
static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
// A normal tree-sitter rule decided that the current branch is invalid and
// now "requests" an error to stop the branch
if (valid_symbols[TRIGGER_ERROR]) {
return error(lexer);
}
// Decide which tokens to consider based on the first non-whitespace
// character
switch (lexer->lookahead) {
case '`':
// A backtick could mark the beginning or ending of a code span or a
// fenced code block.
return parse_backtick(s, lexer, valid_symbols);
case '$':
return parse_dollar(s, lexer, valid_symbols);
case '*':
// A star could either mark the beginning or ending of emphasis, a
// list item or thematic break. This code is similar to the code for
// '_' and '+'.
return parse_star(s, lexer, valid_symbols);
case '_':
return parse_underscore(s, lexer, valid_symbols);
case '~':
return parse_tilde(s, lexer, valid_symbols);
}
return false;
}
void *tree_sitter_markdown_inline_external_scanner_create() {
Scanner *s = (Scanner *)malloc(sizeof(Scanner));
deserialize(s, NULL, 0);
return s;
}
bool tree_sitter_markdown_inline_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
return scan(scanner, lexer, valid_symbols);
}
unsigned tree_sitter_markdown_inline_external_scanner_serialize(void *payload,
char *buffer) {
Scanner *scanner = (Scanner *)payload;
return serialize(scanner, buffer);
}
void tree_sitter_markdown_inline_external_scanner_deserialize(void *payload,
char *buffer,
unsigned length) {
Scanner *scanner = (Scanner *)payload;
deserialize(scanner, buffer, length);
}
void tree_sitter_markdown_inline_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
free(scanner);
}

View File

@@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@@ -0,0 +1,287 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
(_array__grow((Array *)(self), count, array_elem_size(self)), \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
(self)->size += (count))
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@@ -0,0 +1,230 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Matthias Deiml
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

62
test-grammars/markdown/highlights.scm vendored Normal file
View File

@@ -0,0 +1,62 @@
(setext_heading (paragraph) @markup.heading.1 (setext_h1_underline) @markup.heading.marker)
(setext_heading (paragraph) @markup.heading.2 (setext_h2_underline) @markup.heading.marker)
(atx_heading (atx_h1_marker) @markup.heading.marker) @markup.heading.1
(atx_heading (atx_h2_marker) @markup.heading.marker) @markup.heading.2
(atx_heading (atx_h3_marker) @markup.heading.marker) @markup.heading.3
(atx_heading (atx_h4_marker) @markup.heading.marker) @markup.heading.4
(atx_heading (atx_h5_marker) @markup.heading.marker) @markup.heading.5
(atx_heading (atx_h6_marker) @markup.heading.marker) @markup.heading.6
[
(indented_code_block)
(fenced_code_block)
] @markup.raw.block
(info_string) @label
[
(fenced_code_block_delimiter)
] @punctuation.bracket
[
(link_destination)
] @markup.link.url
[
(link_label)
] @markup.link.label
[
(list_marker_plus)
(list_marker_minus)
(list_marker_star)
] @markup.list.unnumbered
[
(list_marker_dot)
(list_marker_parenthesis)
] @markup.list.numbered
(task_list_marker_checked) @markup.list.checked
(task_list_marker_unchecked) @markup.list.unchecked
(thematic_break) @punctuation.special
[
(block_continuation)
(block_quote_marker)
] @punctuation.special
[
(backslash_escape)
] @string.escape
(block_quote) @markup.quote
(pipe_table_row
"|" @punctuation.special)
(pipe_table_header
"|" @punctuation.special)
(pipe_table_delimiter_row) @punctuation.special

22
test-grammars/markdown/injections.scm vendored Normal file
View File

@@ -0,0 +1,22 @@
; From nvim-treesitter/nvim-treesitter
(fenced_code_block
(code_fence_content) @injection.shebang @injection.content
(#set! injection.include-unnamed-children))
(fenced_code_block
(info_string
(language) @injection.language)
(code_fence_content) @injection.content (#set! injection.include-unnamed-children))
((html_block) @injection.content
(#set! injection.language "html")
(#set! injection.include-unnamed-children)
(#set! injection.combined))
((pipe_table_cell) @injection.content (#set! injection.language "markdown-inline") (#set! injection.include-unnamed-children))
((minus_metadata) @injection.content (#set! injection.language "yaml") (#set! injection.include-unnamed-children))
((plus_metadata) @injection.content (#set! injection.language "toml") (#set! injection.include-unnamed-children))
((inline) @injection.content (#set! injection.language "markdown-inline") (#set! injection.include-unnamed-children))

View File

@@ -0,0 +1,6 @@
{
"repo": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
"rev": "62516e8c78380e3b51d5b55727995d2c511436d8",
"license": "MIT",
"compressed": true
}

BIN
test-grammars/markdown/src/grammar.json vendored Normal file

Binary file not shown.

BIN
test-grammars/markdown/src/parser.c vendored Normal file

Binary file not shown.

1597
test-grammars/markdown/src/scanner.c vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@@ -0,0 +1,287 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
(_array__grow((Array *)(self), count, array_elem_size(self)), \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
(self)->size += (count))
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@@ -0,0 +1,230 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2017 Maxim Sokolov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

479
test-grammars/rust/highlights.scm vendored Normal file
View File

@@ -0,0 +1,479 @@
; -------
; Basic identifiers
; -------
; We do not style ? as an operator on purpose as it allows styling ? differently, as many highlighters do. @operator.special might have been a better scope, but @special is already documented so the change would break themes (including the intent of the default theme)
"?" @special
(type_identifier) @type
(identifier) @variable
(field_identifier) @variable.other.member
; -------
; Operators
; -------
[
"*"
"'"
"->"
"=>"
"<="
"="
"=="
"!"
"!="
"%"
"%="
"&"
"&="
"&&"
"|"
"|="
"||"
"^"
"^="
"*"
"*="
"-"
"-="
"+"
"+="
"/"
"/="
">"
"<"
">="
">>"
"<<"
">>="
"<<="
"@"
".."
"..="
"'"
] @operator
; -------
; Paths
; -------
(use_declaration
argument: (identifier) @namespace)
(use_wildcard
(identifier) @namespace)
(extern_crate_declaration
name: (identifier) @namespace
alias: (identifier)? @namespace)
(mod_item
name: (identifier) @namespace)
(scoped_use_list
path: (identifier)? @namespace)
(use_list
(identifier) @namespace)
(use_as_clause
path: (identifier)? @namespace
alias: (identifier) @namespace)
; -------
; Types
; -------
(type_parameters
(type_identifier) @type.parameter)
(constrained_type_parameter
left: (type_identifier) @type.parameter)
(optional_type_parameter
name: (type_identifier) @type.parameter)
((type_arguments (type_identifier) @constant)
(#match? @constant "^[A-Z_]+$"))
(type_arguments (type_identifier) @type)
(tuple_struct_pattern "_" @comment.unused)
((type_arguments (type_identifier) @comment.unused)
(#eq? @comment.unused "_"))
; ---
; Primitives
; ---
(escape_sequence) @constant.character.escape
(primitive_type) @type.builtin
(boolean_literal) @constant.builtin.boolean
(integer_literal) @constant.numeric.integer
(float_literal) @constant.numeric.float
(char_literal) @constant.character
[
(string_literal)
(raw_string_literal)
] @string
(outer_doc_comment_marker "/" @comment)
(inner_doc_comment_marker "!" @comment)
[
(line_comment)
(block_comment)
] @comment
; ---
; Extraneous
; ---
(self) @variable.builtin
(field_initializer
(field_identifier) @variable.other.member)
(shorthand_field_initializer
(identifier) @variable.other.member)
(shorthand_field_identifier) @variable.other.member
(lifetime
"'" @label
(identifier) @label)
(label
"'" @label
(identifier) @label)
; ---
; Punctuation
; ---
[
"::"
"."
";"
","
":"
] @punctuation.delimiter
[
"("
")"
"["
"]"
"{"
"}"
"#"
] @punctuation.bracket
(type_arguments
[
"<"
">"
] @punctuation.bracket)
(type_parameters
[
"<"
">"
] @punctuation.bracket)
(for_lifetimes ["<" ">"] @punctuation.bracket)
(closure_parameters
"|" @punctuation.bracket)
(bracketed_type ["<" ">"] @punctuation.bracket)
; ---
; Variables
; ---
(let_declaration
pattern: [
((identifier) @variable)
((tuple_pattern
(identifier) @variable))
])
; It needs to be anonymous to not conflict with `call_expression` further below.
(_
value: (field_expression
value: (identifier)? @variable
field: (field_identifier) @variable.other.member))
(parameter
pattern: (identifier) @variable.parameter)
(closure_parameters
(identifier) @variable.parameter)
; -------
; Keywords
; -------
(for_expression
"for" @keyword.control.repeat)
(gen_block "gen" @keyword.control)
"in" @keyword.control
[
"match"
"if"
"else"
"try"
] @keyword.control.conditional
[
"while"
"loop"
] @keyword.control.repeat
[
"break"
"continue"
"return"
"await"
"yield"
] @keyword.control.return
"use" @keyword.control.import
(mod_item "mod" @keyword.control.import !body)
(use_as_clause "as" @keyword.control.import)
(type_cast_expression "as" @keyword.operator)
((generic_type
type: (type_identifier) @keyword)
(#eq? @keyword "use"))
[
(crate)
(super)
"as"
"pub"
"mod"
"extern"
"impl"
"where"
"trait"
"for"
"default"
"async"
] @keyword
[
"struct"
"enum"
"union"
"type"
] @keyword.storage.type
"let" @keyword.storage
"fn" @keyword.function
"unsafe" @keyword.special
"macro_rules!" @function.macro
(mutable_specifier) @keyword.storage.modifier.mut
(reference_type "&" @keyword.storage.modifier.ref)
(self_parameter "&" @keyword.storage.modifier.ref)
[
"static"
"const"
"raw"
"ref"
"move"
"dyn"
] @keyword.storage.modifier
; TODO: variable.mut to highlight mutable identifiers via locals.scm
; ---
; Remaining Paths
; ---
(scoped_identifier
path: (identifier)? @namespace
name: (identifier) @namespace)
(scoped_type_identifier
path: (identifier) @namespace)
; -------
; Functions
; -------
(call_expression
function: [
((identifier) @function)
(scoped_identifier
name: (identifier) @function)
(field_expression
field: (field_identifier) @function)
])
(generic_function
function: [
((identifier) @function)
(scoped_identifier
name: (identifier) @function)
(field_expression
field: (field_identifier) @function.method)
])
(function_item
name: (identifier) @function)
(function_signature_item
name: (identifier) @function)
; -------
; Guess Other Types
; -------
; Other PascalCase identifiers are assumed to be structs.
((identifier) @type
(#match? @type "^[A-Z]"))
(never_type "!" @type)
((identifier) @constant
(#match? @constant "^[A-Z][A-Z\\d_]*$"))
; ---
; PascalCase identifiers in call_expressions (e.g. `Ok()`)
; are assumed to be enum constructors.
; ---
(call_expression
function: [
((identifier) @constructor
(#match? @constructor "^[A-Z]"))
(scoped_identifier
name: ((identifier) @constructor
(#match? @constructor "^[A-Z]")))
])
; ---
; PascalCase identifiers under a path which is also PascalCase
; are assumed to be constructors if they have methods or fields.
; ---
(field_expression
value: (scoped_identifier
path: [
(identifier) @type
(scoped_identifier
name: (identifier) @type)
]
name: (identifier) @constructor
(#match? @type "^[A-Z]")
(#match? @constructor "^[A-Z]")))
(enum_variant (identifier) @type.enum.variant)
; -------
; Constructors
; -------
; TODO: this is largely guesswork, remove it once we get actual info from locals.scm or r-a
(struct_expression
name: (type_identifier) @constructor)
(tuple_struct_pattern
type: [
(identifier) @constructor
(scoped_identifier
name: (identifier) @constructor)
])
(struct_pattern
type: [
((type_identifier) @constructor)
(scoped_type_identifier
name: (type_identifier) @constructor)
])
(match_pattern
((identifier) @constructor) (#match? @constructor "^[A-Z]"))
(or_pattern
((identifier) @constructor)
((identifier) @constructor)
(#match? @constructor "^[A-Z]"))
; ---
; Macros
; ---
(attribute
(identifier) @function.macro)
(inner_attribute_item "!" @punctuation)
(attribute
[
(identifier) @function.macro
(scoped_identifier
name: (identifier) @function.macro)
]
(token_tree (identifier) @function.macro)?)
(inner_attribute_item) @attribute
(macro_definition
name: (identifier) @function.macro)
(macro_invocation
macro: [
((identifier) @function.macro)
(scoped_identifier
name: (identifier) @function.macro)
]
"!" @function.macro)
(metavariable) @variable.parameter
(fragment_specifier) @type
(attribute
(identifier) @special
arguments: (token_tree (identifier) @type)
(#eq? @special "derive")
)
; ---
; Prelude
; ---
((identifier) @type.enum.variant.builtin
(#any-of? @type.enum.variant.builtin "Some" "None" "Ok" "Err"))
(call_expression
(identifier) @function.builtin
(#any-of? @function.builtin
"drop"
"size_of"
"size_of_val"
"align_of"
"align_of_val"))
((type_identifier) @type.builtin
(#any-of?
@type.builtin
"Send"
"Sized"
"Sync"
"Unpin"
"Drop"
"Fn"
"FnMut"
"FnOnce"
"AsMut"
"AsRef"
"From"
"Into"
"DoubleEndedIterator"
"ExactSizeIterator"
"Extend"
"IntoIterator"
"Iterator"
"Option"
"Result"
"Clone"
"Copy"
"Debug"
"Default"
"Eq"
"Hash"
"Ord"
"PartialEq"
"PartialOrd"
"ToOwned"
"Box"
"String"
"ToString"
"Vec"
"FromIterator"
"TryFrom"
"TryInto"))

81
test-grammars/rust/injections.scm vendored Normal file
View File

@@ -0,0 +1,81 @@
([(line_comment !doc) (block_comment !doc)] @injection.content
(#set! injection.language "comment"))
((doc_comment) @injection.content
(#set! injection.language "markdown")
(#set! injection.combined))
((macro_invocation
macro:
[
(scoped_identifier
name: (_) @_macro_name)
(identifier) @_macro_name
]
(token_tree) @injection.content)
(#eq? @_macro_name "html")
(#set! injection.language "html")
(#set! injection.include-children))
((macro_invocation
macro:
[
(scoped_identifier
name: (_) @_macro_name)
(identifier) @_macro_name
]
(token_tree) @injection.content)
(#eq? @_macro_name "slint")
(#set! injection.language "slint")
(#set! injection.include-children))
((macro_invocation
(token_tree) @injection.content)
(#set! injection.language "rust")
(#set! injection.include-children))
((macro_rule
(token_tree) @injection.content)
(#set! injection.language "rust")
(#set! injection.include-children))
(call_expression
function: (scoped_identifier
path: (identifier) @_regex (#eq? @_regex "Regex")
name: (identifier) @_new (#eq? @_new "new"))
arguments: (arguments (raw_string_literal) @injection.content)
(#set! injection.language "regex"))
(call_expression
function: (scoped_identifier
path: (scoped_identifier (identifier) @_regex (#eq? @_regex "Regex") .)
name: (identifier) @_new (#eq? @_new "new"))
arguments: (arguments (raw_string_literal) @injection.content)
(#set! injection.language "regex"))
; Highlight SQL in `sqlx::query!()`, `sqlx::query_scalar!()`, and `sqlx::query_scalar_unchecked!()`
(macro_invocation
macro: (scoped_identifier
path: (identifier) @_sqlx (#eq? @_sqlx "sqlx")
name: (identifier) @_query (#match? @_query "^query(_scalar|_scalar_unchecked)?$"))
(token_tree
; Only the first argument is SQL
.
[(string_literal) (raw_string_literal)] @injection.content
)
(#set! injection.language "sql"))
; Highlight SQL in `sqlx::query_as!()` and `sqlx::query_as_unchecked!()`
(macro_invocation
macro: (scoped_identifier
path: (identifier) @_sqlx (#eq? @_sqlx "sqlx")
name: (identifier) @_query_as (#match? @_query_as "^query_as(_unchecked)?$"))
(token_tree
; Only the second argument is SQL
.
; Allow anything as the first argument in case the user has lower case type
; names for some reason
(_)
[(string_literal) (raw_string_literal)] @injection.content
)
(#set! injection.language "sql"))

View File

@@ -0,0 +1,6 @@
{
"repo": "https://github.com/tree-sitter/tree-sitter-rust",
"rev": "1f63b33efee17e833e0ea29266dd3d713e27e321",
"license": "MIT",
"compressed": true
}

BIN
test-grammars/rust/src/grammar.json vendored Normal file

Binary file not shown.

BIN
test-grammars/rust/src/parser.c vendored Normal file

Binary file not shown.

393
test-grammars/rust/src/scanner.c vendored Normal file
View File

@@ -0,0 +1,393 @@
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
#include <wctype.h>
enum TokenType {
STRING_CONTENT,
RAW_STRING_LITERAL_START,
RAW_STRING_LITERAL_CONTENT,
RAW_STRING_LITERAL_END,
FLOAT_LITERAL,
BLOCK_OUTER_DOC_MARKER,
BLOCK_INNER_DOC_MARKER,
BLOCK_COMMENT_CONTENT,
LINE_DOC_CONTENT,
ERROR_SENTINEL
};
typedef struct {
uint8_t opening_hash_count;
} Scanner;
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
buffer[0] = (char)scanner->opening_hash_count;
return 1;
}
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = 0;
if (length == 1) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = buffer[0];
}
}
static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static inline bool process_string(TSLexer *lexer) {
bool has_content = false;
for (;;) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
}
if (lexer->eof(lexer)) {
return false;
}
has_content = true;
advance(lexer);
}
lexer->result_symbol = STRING_CONTENT;
lexer->mark_end(lexer);
return has_content;
}
static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
advance(lexer);
}
if (lexer->lookahead != 'r') {
return false;
}
advance(lexer);
uint8_t opening_hash_count = 0;
while (lexer->lookahead == '#') {
advance(lexer);
opening_hash_count++;
}
if (lexer->lookahead != '"') {
return false;
}
advance(lexer);
scanner->opening_hash_count = opening_hash_count;
lexer->result_symbol = RAW_STRING_LITERAL_START;
return true;
}
static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
for (;;) {
if (lexer->eof(lexer)) {
return false;
}
if (lexer->lookahead == '"') {
lexer->mark_end(lexer);
advance(lexer);
unsigned hash_count = 0;
while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
advance(lexer);
hash_count++;
}
if (hash_count == scanner->opening_hash_count) {
lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
return true;
}
} else {
advance(lexer);
}
}
}
static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
advance(lexer);
for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
advance(lexer);
}
lexer->result_symbol = RAW_STRING_LITERAL_END;
return true;
}
static inline bool process_float_literal(TSLexer *lexer) {
lexer->result_symbol = FLOAT_LITERAL;
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
bool has_fraction = false, has_exponent = false;
if (lexer->lookahead == '.') {
has_fraction = true;
advance(lexer);
if (iswalpha(lexer->lookahead)) {
// The dot is followed by a letter: 1.max(2) => not a float but an integer
return false;
}
if (lexer->lookahead == '.') {
return false;
}
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
has_exponent = true;
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!is_num_char(lexer->lookahead)) {
return true;
}
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
}
if (!has_exponent && !has_fraction) {
return false;
}
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
return true;
}
advance(lexer);
if (!iswdigit(lexer->lookahead)) {
return true;
}
while (iswdigit(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
return true;
}
static inline bool process_line_doc_content(TSLexer *lexer) {
lexer->result_symbol = LINE_DOC_CONTENT;
for (;;) {
if (lexer->eof(lexer)) {
return true;
}
if (lexer->lookahead == '\n') {
// Include the newline in the doc content node.
// Line endings are useful for markdown injection.
advance(lexer);
return true;
}
advance(lexer);
}
}
typedef enum {
LeftForwardSlash,
LeftAsterisk,
Continuing,
} BlockCommentState;
typedef struct {
BlockCommentState state;
unsigned nestingDepth;
} BlockCommentProcessing;
static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
if (current == '*') {
processing->nestingDepth += 1;
}
processing->state = Continuing;
};
static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
if (current == '*') {
lexer->mark_end(lexer);
processing->state = LeftAsterisk;
return;
}
if (current == '/') {
processing->nestingDepth -= 1;
}
processing->state = Continuing;
}
static inline void process_continuing(BlockCommentProcessing *processing, char current) {
switch (current) {
case '/':
processing->state = LeftForwardSlash;
break;
case '*':
processing->state = LeftAsterisk;
break;
}
}
static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
char first = (char)lexer->lookahead;
// The first character is stored so we can safely advance inside
// these if blocks. However, because we only store one, we can only
// safely advance 1 time. Since there's a chance that an advance could
// happen in one state, we must advance in all states to ensure that
// the program ends up in a sane state prior to processing the block
// comment if need be.
if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
advance(lexer);
return true;
}
if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
advance(lexer);
lexer->mark_end(lexer);
// If the next token is a / that means that it's an empty block comment.
if (lexer->lookahead == '/') {
return false;
}
// If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
// as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
if (lexer->lookahead != '*') {
lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
return true;
}
} else {
advance(lexer);
}
if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
BlockCommentProcessing processing = {Continuing, 1};
// Manually set the current state based on the first character
switch (first) {
case '*':
processing.state = LeftAsterisk;
if (lexer->lookahead == '/') {
// This case can happen in an empty doc block comment
// like /*!*/. The comment has no contents, so bail.
return false;
}
break;
case '/':
processing.state = LeftForwardSlash;
break;
default:
processing.state = Continuing;
break;
}
// For the purposes of actually parsing rust code, this
// is incorrect as it considers an unterminated block comment
// to be an error. However, for the purposes of syntax highlighting
// this should be considered successful as otherwise you are not able
// to syntax highlight a block of code prior to closing the
// block comment
while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
// Set first to the current lookahead as that is the second character
// as we force an advance in the above code when we are checking if we
// need to handle a block comment inner or outer doc comment signifier
// node
first = (char)lexer->lookahead;
switch (processing.state) {
case LeftForwardSlash:
process_left_forward_slash(&processing, first);
break;
case LeftAsterisk:
process_left_asterisk(&processing, first, lexer);
break;
case Continuing:
lexer->mark_end(lexer);
process_continuing(&processing, first);
break;
default:
break;
}
advance(lexer);
if (first == '/' && processing.nestingDepth != 0) {
lexer->mark_end(lexer);
}
}
lexer->result_symbol = BLOCK_COMMENT_CONTENT;
return true;
}
return false;
}
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
// The documentation states that if the lexical analysis fails for some reason
// they will mark every state as valid and pass it to the external scanner
// However, we can't do anything to help them recover in that case so we
// should just fail.
/*
link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
If a syntax error is encountered during regular parsing, Tree-sitters
first action during error recovery will be to call the external scanners
scan function with all tokens marked valid. The scanner should detect this
case and handle it appropriately. One simple method of detection is to add
an unused token to the end of the externals array, for example
externals: $ => [$.token1, $.token2, $.error_sentinel],
then check whether that token is marked valid to determine whether
Tree-sitter is in error correction mode.
*/
if (valid_symbols[ERROR_SENTINEL]) {
return false;
}
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
return process_block_comment(lexer, valid_symbols);
}
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
return process_string(lexer);
}
if (valid_symbols[LINE_DOC_CONTENT]) {
return process_line_doc_content(lexer);
}
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_START] &&
(lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
return scan_raw_string_start(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
return scan_raw_string_content(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
return scan_raw_string_end(scanner, lexer);
}
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
return process_float_literal(lexer);
}
return false;
}

View File

@@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t size);
extern void *(*ts_current_calloc)(size_t count, size_t size);
extern void *(*ts_current_realloc)(void *ptr, size_t size);
extern void (*ts_current_free)(void *ptr);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@@ -0,0 +1,266 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_