diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..452a28a
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+test-grammars/*/*.scm linguist-vendored
+test-grammars/*/src/** linguist-vendored
+test-grammars/*/src/{parser.c,grammar.json,scanner.*} binary
diff --git a/.gitignore b/.gitignore
index 7d155d4..bf27905 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 target
 result
 .direnv
-test-grammars/
+/test-grammars/*/*.so
+/test-grammars/*/.BUILD_COOKIE
diff --git a/highlighter/src/tests.rs b/highlighter/src/tests.rs
index fb7c524..f95b772 100644
--- a/highlighter/src/tests.rs
+++ b/highlighter/src/tests.rs
@@ -15,7 +15,6 @@ use crate::injections_query::{InjectionLanguageMarker, InjectionsQuery};
 use crate::Language;
 
 static GRAMMARS: Lazy<Vec<PathBuf>> = Lazy::new(|| {
-    fs::create_dir_all("../test-grammars").unwrap();
     let skidder_config = skidder_config();
     skidder::fetch(&skidder_config, false).unwrap();
     skidder::build_all_grammars(&skidder_config, false, None).unwrap();
@@ -26,12 +25,11 @@ static GRAMMARS: Lazy<Vec<PathBuf>> = Lazy::new(|| {
 
 fn skidder_config() -> skidder::Config {
     skidder::Config {
-        repos: vec![Repo::Git {
-            name: "helix-language-support".to_owned(),
-            remote: "git@github.com:helix-editor/tree-sitter-grammars.git".into(),
-            branch: "reversed".into(),
+        repos: vec![Repo::Local {
+            // `./test-grammars` in the root of the repo.
+            path: Path::new("../test-grammars").canonicalize().unwrap(),
         }],
-        index: Path::new("../test-grammars").canonicalize().unwrap(),
+        index: PathBuf::new(),
         verbose: true,
     }
 }
diff --git a/test-grammars/comment/LICENSE b/test-grammars/comment/LICENSE
new file mode 100644
index 0000000..8b03b18
--- /dev/null
+++ b/test-grammars/comment/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Santos Gallegos
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test-grammars/comment/highlights.scm b/test-grammars/comment/highlights.scm
new file mode 100644
index 0000000..56c1fd7
--- /dev/null
+++ b/test-grammars/comment/highlights.scm
@@ -0,0 +1,41 @@
+(tag
+ (name) @ui.text
+ (user)? @constant)
+
+; Hint level tags
+((tag (name) @hint)
+ (#any-of? @hint "HINT" "MARK" "PASSED" "STUB" "MOCK"))
+
+("text" @hint
+ (#any-of? @hint "HINT" "MARK" "PASSED" "STUB" "MOCK"))
+
+; Info level tags
+((tag (name) @info)
+ (#any-of? @info "INFO" "NOTE" "TODO" "PERF" "OPTIMIZE" "PERFORMANCE" "QUESTION" "ASK"))
+
+("text" @info
+ (#any-of? @info "INFO" "NOTE" "TODO" "PERF" "OPTIMIZE" "PERFORMANCE" "QUESTION" "ASK"))
+
+; Warning level tags
+((tag (name) @warning)
+ (#any-of? @warning "HACK" "WARN" "WARNING" "TEST" "TEMP"))
+
+("text" @warning
+ (#any-of? @warning "HACK" "WARN" "WARNING" "TEST" "TEMP"))
+
+; Error level tags
+((tag (name) @error)
+ (#any-of? @error "BUG" "FIXME" "ISSUE" "XXX" "FIX" "SAFETY" "FIXIT" "FAILED" "DEBUG" "INVARIANT" "COMPLIANCE"))
+
+("text" @error
+ (#any-of? @error "BUG" "FIXME" "ISSUE" "XXX" "FIX" "SAFETY" "FIXIT" "FAILED" "DEBUG" "INVARIANT" "COMPLIANCE"))
+
+; Issue number (#123)
+("text" @constant.numeric
+ (#match? @constant.numeric "^#[0-9]+$"))
+
+; User mention (@user)
+("text" @tag
+ (#match? @tag "^[@][a-zA-Z0-9_-]+$"))
+
+(uri) @markup.link.url
diff --git a/test-grammars/comment/metadata.json b/test-grammars/comment/metadata.json
new file mode 100644
index 0000000..3c3a6e1
--- /dev/null
+++ b/test-grammars/comment/metadata.json
@@ -0,0 +1,6 @@
+{
+  "repo": "https://github.com/stsewd/tree-sitter-comment",
+  "rev": "aefcc2813392eb6ffe509aa0fc8b4e9b57413ee1",
+  "license": "MIT",
+  "compressed": true
+}
\ No newline at end of file
diff --git a/test-grammars/comment/src/grammar.json b/test-grammars/comment/src/grammar.json
new file mode 100644
index 0000000..de26a9e
Binary files /dev/null and b/test-grammars/comment/src/grammar.json differ
diff --git a/test-grammars/comment/src/parser.c b/test-grammars/comment/src/parser.c
new file mode 100644
index 0000000..0713ce0
Binary files /dev/null and b/test-grammars/comment/src/parser.c differ
diff --git a/test-grammars/comment/src/scanner.c b/test-grammars/comment/src/scanner.c
new file mode 100644
index 0000000..d8b0e24
--- /dev/null
+++ b/test-grammars/comment/src/scanner.c
@@ -0,0 +1,35 @@
+#include <tree_sitter/parser.h>
+
+#include "tree_sitter_comment/parser.c"
+#include "tree_sitter_comment/tokens.h"
+
+void* tree_sitter_comment_external_scanner_create()
+{
+  return NULL;
+}
+
+void tree_sitter_comment_external_scanner_destroy(void* payload)
+{
+}
+
+unsigned tree_sitter_comment_external_scanner_serialize(
+    void* payload,
+    char* buffer)
+{
+  return 0;
+}
+
+void tree_sitter_comment_external_scanner_deserialize(
+    void* payload,
+    const char* buffer,
+    unsigned length)
+{
+}
+
+bool tree_sitter_comment_external_scanner_scan(
+    void* payload,
+    TSLexer* lexer,
+    const bool* valid_symbols)
+{
+  return parse(lexer, valid_symbols);
+}
diff --git a/test-grammars/comment/src/tree_sitter/parser.h b/test-grammars/comment/src/tree_sitter/parser.h
new file mode 100644
index 0000000..2b14ac1
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter/parser.h
@@ -0,0 +1,224 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+typedef uint16_t TSStateId;
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value            \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value,           \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/test-grammars/comment/src/tree_sitter_comment/chars.c b/test-grammars/comment/src/tree_sitter_comment/chars.c
new file mode 100644
index 0000000..85c0973
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter_comment/chars.c
@@ -0,0 +1,66 @@
+#include "chars.h"
+
+bool is_upper(int32_t c)
+{
+  const int32_t upper = 65;
+  const int32_t lower = 90;
+  return c >= upper && c <= lower;
+}
+
+bool is_digit(int32_t c)
+{
+  const int32_t upper = 48;
+  const int32_t lower = 57;
+  return c >= upper && c <= lower;
+}
+
+bool is_newline(int32_t c)
+{
+  const int32_t newline_chars[] = {
+    CHAR_EOF,
+    CHAR_NEWLINE,
+    CHAR_CARRIAGE_RETURN,
+  };
+  const int length = sizeof(newline_chars) / sizeof(int32_t);
+  for (int i = 0; i < length; i++) {
+    if (c == newline_chars[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool is_space(int32_t c)
+{
+  const int32_t space_chars[] = {
+    CHAR_SPACE,
+    CHAR_FORM_FEED,
+    CHAR_TAB,
+    CHAR_VERTICAL_TAB,
+  };
+  const int length = sizeof(space_chars) / sizeof(int32_t);
+  bool is_space_char = false;
+  for (int i = 0; i < length; i++) {
+    if (c == space_chars[i]) {
+      is_space_char = true;
+      break;
+    }
+  }
+  return is_space_char || is_newline(c);
+}
+
+/// Check if the character is allowed inside the name.
+bool is_internal_char(int32_t c)
+{
+  const int32_t valid_chars[] = {
+    '-',
+    '_',
+  };
+  const int length = sizeof(valid_chars) / sizeof(int32_t);
+  for (int i = 0; i < length; i++) {
+    if (c == valid_chars[i]) {
+      return true;
+    }
+  }
+  return false;
+}
diff --git a/test-grammars/comment/src/tree_sitter_comment/chars.h b/test-grammars/comment/src/tree_sitter_comment/chars.h
new file mode 100644
index 0000000..fa5ad3a
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter_comment/chars.h
@@ -0,0 +1,22 @@
+#ifndef TREE_SITTER_COMMENT_CHARS_H
+#define TREE_SITTER_COMMENT_CHARS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define CHAR_EOF 0
+#define CHAR_NEWLINE 10
+#define CHAR_CARRIAGE_RETURN 13
+
+#define CHAR_SPACE ' '
+#define CHAR_FORM_FEED '\f'
+#define CHAR_TAB '\t'
+#define CHAR_VERTICAL_TAB '\v'
+
+bool is_internal_char(int32_t c);
+bool is_newline(int32_t c);
+bool is_space(int32_t c);
+bool is_upper(int32_t c);
+bool is_digit(int32_t c);
+
+#endif /* ifndef TREE_SITTER_COMMENT_CHARS_H */
diff --git a/test-grammars/comment/src/tree_sitter_comment/parser.c b/test-grammars/comment/src/tree_sitter_comment/parser.c
new file mode 100644
index 0000000..89eb616
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter_comment/parser.c
@@ -0,0 +1,97 @@
+#include "parser.h"
+
+#include "chars.c"
+#include "tokens.h"
+#include <stdbool.h>
+#include <stdio.h>
+
+/// Parse the name of the tag.
+///
+/// They can be of the form:
+/// - TODO:
+/// - TODO: text
+/// - TODO(stsewd):
+/// - TODO(stsewd): text
+/// - TODO (stsewd): text
+bool parse_tagname(TSLexer* lexer, const bool* valid_symbols)
+{
+  if (!is_upper(lexer->lookahead) || !valid_symbols[T_TAGNAME]) {
+    return false;
+  }
+
+  int32_t previous = lexer->lookahead;
+  lexer->advance(lexer, false);
+
+  while (is_upper(lexer->lookahead)
+      || is_digit(lexer->lookahead)
+      || is_internal_char(lexer->lookahead)) {
+    previous = lexer->lookahead;
+    lexer->advance(lexer, false);
+  }
+  // The tag name ends here.
+  // But we keep parsing to see if it's a valid tag name.
+  lexer->mark_end(lexer);
+
+  // It can't end with an internal char.
+  if (is_internal_char(previous)) {
+    return false;
+  }
+
+  // For the user component this is `\s*(`.
+  // We don't parse that part, we just need to be sure it ends with `:\s`.
+  if ((is_space(lexer->lookahead) && !is_newline(lexer->lookahead))
+      || lexer->lookahead == '(') {
+    // Skip white spaces.
+    while (is_space(lexer->lookahead) && !is_newline(lexer->lookahead)) {
+      lexer->advance(lexer, false);
+    }
+    // Checking aperture.
+    if (lexer->lookahead != '(') {
+      return false;
+    }
+    lexer->advance(lexer, false);
+
+    // Checking closure.
+    int user_length = 0;
+    while (lexer->lookahead != ')') {
+      if (is_newline(lexer->lookahead)) {
+        return false;
+      }
+      lexer->advance(lexer, false);
+      user_length++;
+    }
+    if (user_length <= 0) {
+      return false;
+    }
+    lexer->advance(lexer, false);
+  }
+
+  // It should end with `:`...
+  if (lexer->lookahead != ':') {
+    return false;
+  }
+
+  // ... and be followed by one space.
+  lexer->advance(lexer, false);
+  if (!is_space(lexer->lookahead)) {
+    return false;
+  }
+
+  lexer->result_symbol = T_TAGNAME;
+  return true;
+}
+
+bool parse(TSLexer* lexer, const bool* valid_symbols)
+{
+  // If all valid symbols are true, tree-sitter is in correction mode.
+  // We don't want to parse anything in that case.
+  if (valid_symbols[T_INVALID_TOKEN]) {
+    return false;
+  }
+
+  if (is_upper(lexer->lookahead) && valid_symbols[T_TAGNAME]) {
+    return parse_tagname(lexer, valid_symbols);
+  }
+
+  return false;
+}
diff --git a/test-grammars/comment/src/tree_sitter_comment/parser.h b/test-grammars/comment/src/tree_sitter_comment/parser.h
new file mode 100644
index 0000000..9c89dac
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter_comment/parser.h
@@ -0,0 +1,9 @@
+#ifndef TREE_SITTER_COMMENT_PARSER_H
+#define TREE_SITTER_COMMENT_PARSER_H
+
+#include <tree_sitter/parser.h>
+
+bool parse_tagname(TSLexer* lexer, const bool* valid_symbols);
+bool parse(TSLexer* lexer, const bool* valid_symbols);
+
+#endif /* ifndef TREE_SITTER_COMMENT_PARSER_H */
diff --git a/test-grammars/comment/src/tree_sitter_comment/tokens.h b/test-grammars/comment/src/tree_sitter_comment/tokens.h
new file mode 100644
index 0000000..b165641
--- /dev/null
+++ b/test-grammars/comment/src/tree_sitter_comment/tokens.h
@@ -0,0 +1,9 @@
+#ifndef TREE_SITTER_COMMENT_TOKENS_H
+#define TREE_SITTER_COMMENT_TOKENS_H
+
+enum TokenType {
+  T_TAGNAME,
+  T_INVALID_TOKEN,
+};
+
+#endif /* ifndef TREE_SITTER_COMMENT_TOKENS_H */
diff --git a/test-grammars/html/LICENSE b/test-grammars/html/LICENSE
new file mode 100644
index 0000000..4b52d19
--- /dev/null
+++ b/test-grammars/html/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Max Brunsfeld
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test-grammars/html/highlights.scm b/test-grammars/html/highlights.scm
new file mode 100644
index 0000000..8581f0a
--- /dev/null
+++ b/test-grammars/html/highlights.scm
@@ -0,0 +1,47 @@
+(tag_name) @tag
+(erroneous_end_tag_name) @error
+(doctype) @constant
+(attribute_name) @attribute
+
+(attribute [(attribute_value) (quoted_attribute_value)] @string)
+
+((attribute
+  (attribute_name) @attribute
+  (quoted_attribute_value (attribute_value) @markup.link.url))
+ (#any-of? @attribute "href" "src"))
+
+((element
+  (start_tag
+    (tag_name) @_tag)
+  (text) @markup.link.label)
+  (#eq? @_tag "a"))
+
+((element
+  (start_tag
+    (tag_name) @_tag)
+  (text) @markup.bold)
+  (#any-of? @_tag "strong" "b"))
+
+((element
+  (start_tag
+    (tag_name) @_tag)
+  (text) @markup.italic)
+  (#any-of? @_tag "em" "i"))
+
+((element
+  (start_tag
+    (tag_name) @_tag)
+  (text) @markup.strikethrough)
+  (#any-of? @_tag "s" "del"))
+
+[
+  "<"
+  ">"
+  "</"
+  "/>"
+  "<!"
+] @punctuation.bracket
+
+"=" @punctuation.delimiter
+
+(comment) @comment
diff --git a/test-grammars/html/injections.scm b/test-grammars/html/injections.scm
new file mode 100644
index 0000000..ef58f41
--- /dev/null
+++ b/test-grammars/html/injections.scm
@@ -0,0 +1,10 @@
+((comment) @injection.content
+ (#set! injection.language "comment"))
+
+((script_element
+  (raw_text) @injection.content)
+ (#set! injection.language "javascript"))
+
+((style_element
+  (raw_text) @injection.content)
+ (#set! injection.language "css"))
diff --git a/test-grammars/html/metadata.json b/test-grammars/html/metadata.json
new file mode 100644
index 0000000..65fcc2c
--- /dev/null
+++ b/test-grammars/html/metadata.json
@@ -0,0 +1,6 @@
+{
+  "repo": "https://github.com/tree-sitter/tree-sitter-html",
+  "rev": "29f53d8f4f2335e61bf6418ab8958dac3282077a",
+  "license": "MIT",
+  "compressed": true
+}
\ No newline at end of file
diff --git a/test-grammars/html/src/grammar.json b/test-grammars/html/src/grammar.json
new file mode 100644
index 0000000..c34f601
Binary files /dev/null and b/test-grammars/html/src/grammar.json differ
diff --git a/test-grammars/html/src/parser.c b/test-grammars/html/src/parser.c
new file mode 100644
index 0000000..95b2d91
Binary files /dev/null and b/test-grammars/html/src/parser.c differ
diff --git a/test-grammars/html/src/scanner.cc b/test-grammars/html/src/scanner.cc
new file mode 100644
index 0000000..515f86e
--- /dev/null
+++ b/test-grammars/html/src/scanner.cc
@@ -0,0 +1,310 @@
+#include <tree_sitter/parser.h>
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <cwctype>
+#include <cstring>
+#include "tag.h"
+
+namespace {
+
+using std::vector;
+using std::string;
+
+enum TokenType {
+  START_TAG_NAME,
+  SCRIPT_START_TAG_NAME,
+  STYLE_START_TAG_NAME,
+  END_TAG_NAME,
+  ERRONEOUS_END_TAG_NAME,
+  SELF_CLOSING_TAG_DELIMITER,
+  IMPLICIT_END_TAG,
+  RAW_TEXT,
+  COMMENT
+};
+
+struct Scanner {
+  Scanner() {}
+
+  unsigned serialize(char *buffer) {
+    uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();
+    uint16_t serialized_tag_count = 0;
+
+    unsigned i = sizeof(tag_count);
+    std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));
+    i += sizeof(tag_count);
+
+    for (; serialized_tag_count < tag_count; serialized_tag_count++) {
+      Tag &tag = tags[serialized_tag_count];
+      if (tag.type == CUSTOM) {
+        unsigned name_length = tag.custom_tag_name.size();
+        if (name_length > UINT8_MAX) name_length = UINT8_MAX;
+        if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
+        buffer[i++] = static_cast<char>(tag.type);
+        buffer[i++] = name_length;
+        tag.custom_tag_name.copy(&buffer[i], name_length);
+        i += name_length;
+      } else {
+        if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
+        buffer[i++] = static_cast<char>(tag.type);
+      }
+    }
+
+    std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
+    return i;
+  }
+
+  void deserialize(const char *buffer, unsigned length) {
+    tags.clear();
+    if (length > 0) {
+      unsigned i = 0;
+      uint16_t tag_count, serialized_tag_count;
+
+      std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));
+      i += sizeof(serialized_tag_count);
+
+      std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));
+      i += sizeof(tag_count);
+
+      tags.resize(tag_count);
+      for (unsigned j = 0; j < serialized_tag_count; j++) {
+        Tag &tag = tags[j];
+        tag.type = static_cast<TagType>(buffer[i++]);
+        if (tag.type == CUSTOM) {
+          uint16_t name_length = static_cast<uint8_t>(buffer[i++]);
+          tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);
+          i += name_length;
+        }
+      }
+    }
+  }
+
+  string scan_tag_name(TSLexer *lexer) {
+    string tag_name;
+    while (iswalnum(lexer->lookahead) ||
+           lexer->lookahead == '-' ||
+           lexer->lookahead == ':') {
+      tag_name += towupper(lexer->lookahead);
+      lexer->advance(lexer, false);
+    }
+    return tag_name;
+  }
+
+  bool scan_comment(TSLexer *lexer) {
+    if (lexer->lookahead != '-') return false;
+    lexer->advance(lexer, false);
+    if (lexer->lookahead != '-') return false;
+    lexer->advance(lexer, false);
+
+    unsigned dashes = 0;
+    while (lexer->lookahead) {
+      switch (lexer->lookahead) {
+        case '-':
+          ++dashes;
+          break;
+        case '>':
+          if (dashes >= 2) {
+            lexer->result_symbol = COMMENT;
+            lexer->advance(lexer, false);
+            lexer->mark_end(lexer);
+            return true;
+          }
+        default:
+          dashes = 0;
+      }
+      lexer->advance(lexer, false);
+    }
+    return false;
+  }
+
+  bool scan_raw_text(TSLexer *lexer) {
+    if (!tags.size()) return false;
+
+    lexer->mark_end(lexer);
+
+    const string &end_delimiter = tags.back().type == SCRIPT
+      ? "</SCRIPT"
+      : "</STYLE";
+
+    unsigned delimiter_index = 0;
+    while (lexer->lookahead) {
+      if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
+        delimiter_index++;
+        if (delimiter_index == end_delimiter.size()) break;
+        lexer->advance(lexer, false);
+      } else {
+        delimiter_index = 0;
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+      }
+    }
+
+    lexer->result_symbol = RAW_TEXT;
+    return true;
+  }
+
+  bool scan_implicit_end_tag(TSLexer *lexer) {
+    Tag *parent = tags.empty() ? NULL : &tags.back();
+
+    bool is_closing_tag = false;
+    if (lexer->lookahead == '/') {
+      is_closing_tag = true;
+      lexer->advance(lexer, false);
+    } else {
+      if (parent && parent->is_void()) {
+        tags.pop_back();
+        lexer->result_symbol = IMPLICIT_END_TAG;
+        return true;
+      }
+    }
+
+    string tag_name = scan_tag_name(lexer);
+    if (tag_name.empty()) return false;
+
+    Tag next_tag = Tag::for_name(tag_name);
+
+    if (is_closing_tag) {
+      // The tag correctly closes the topmost element on the stack
+      if (!tags.empty() && tags.back() == next_tag) return false;
+
+      // Otherwise, dig deeper and queue implicit end tags (to be nice in
+      // the case of malformed HTML)
+      if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
+        tags.pop_back();
+        lexer->result_symbol = IMPLICIT_END_TAG;
+        return true;
+      }
+    } else if (parent && !parent->can_contain(next_tag)) {
+      tags.pop_back();
+      lexer->result_symbol = IMPLICIT_END_TAG;
+      return true;
+    }
+
+    return false;
+  }
+
+  bool scan_start_tag_name(TSLexer *lexer) {
+    string tag_name = scan_tag_name(lexer);
+    if (tag_name.empty()) return false;
+    Tag tag = Tag::for_name(tag_name);
+    tags.push_back(tag);
+    switch (tag.type) {
+      case SCRIPT:
+        lexer->result_symbol = SCRIPT_START_TAG_NAME;
+        break;
+      case STYLE:
+        lexer->result_symbol = STYLE_START_TAG_NAME;
+        break;
+      default:
+        lexer->result_symbol = START_TAG_NAME;
+        break;
+    }
+    return true;
+  }
+
+  bool scan_end_tag_name(TSLexer *lexer) {
+    string tag_name = scan_tag_name(lexer);
+    if (tag_name.empty()) return false;
+    Tag tag = Tag::for_name(tag_name);
+    if (!tags.empty() && tags.back() == tag) {
+      tags.pop_back();
+      lexer->result_symbol = END_TAG_NAME;
+    } else {
+      lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
+    }
+    return true;
+  }
+
+  bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
+    lexer->advance(lexer, false);
+    if (lexer->lookahead == '>') {
+      lexer->advance(lexer, false);
+      if (!tags.empty()) {
+        tags.pop_back();
+        lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
+      }
+      return true;
+    }
+    return false;
+  }
+
+  bool scan(TSLexer *lexer, const bool *valid_symbols) {
+    while (iswspace(lexer->lookahead)) {
+      lexer->advance(lexer, true);
+    }
+
+    if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
+      return scan_raw_text(lexer);
+    }
+
+    switch (lexer->lookahead) {
+      case '<':
+        lexer->mark_end(lexer);
+        lexer->advance(lexer, false);
+
+        if (lexer->lookahead == '!') {
+          lexer->advance(lexer, false);
+          return scan_comment(lexer);
+        }
+
+        if (valid_symbols[IMPLICIT_END_TAG]) {
+          return scan_implicit_end_tag(lexer);
+        }
+        break;
+
+      case '\0':
+        if (valid_symbols[IMPLICIT_END_TAG]) {
+          return scan_implicit_end_tag(lexer);
+        }
+        break;
+
+      case '/':
+        if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
+          return scan_self_closing_tag_delimiter(lexer);
+        }
+        break;
+
+      default:
+        if ((valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) && !valid_symbols[RAW_TEXT]) {
+          return valid_symbols[START_TAG_NAME]
+            ? scan_start_tag_name(lexer)
+            : scan_end_tag_name(lexer);
+        }
+    }
+
+    return false;
+  }
+
+  vector<Tag> tags;
+};
+
+}
+
+extern "C" {
+
+void *tree_sitter_html_external_scanner_create() {
+  return new Scanner();
+}
+
+bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
+                                            const bool *valid_symbols) {
+  Scanner *scanner = static_cast<Scanner *>(payload);
+  return scanner->scan(lexer, valid_symbols);
+}
+
+unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) {
+  Scanner *scanner = static_cast<Scanner *>(payload);
+  return scanner->serialize(buffer);
+}
+
+void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+  Scanner *scanner = static_cast<Scanner *>(payload);
+  scanner->deserialize(buffer, length);
+}
+
+void tree_sitter_html_external_scanner_destroy(void *payload) {
+  Scanner *scanner = static_cast<Scanner *>(payload);
+  delete scanner;
+}
+
+}
diff --git a/test-grammars/html/src/tag.h b/test-grammars/html/src/tag.h
new file mode 100644
index 0000000..b068eca
--- /dev/null
+++ b/test-grammars/html/src/tag.h
@@ -0,0 +1,380 @@
+#include <string>
+#include <map>
+
+using std::string;
+using std::map;
+
+enum TagType {
+  AREA,
+  BASE,
+  BASEFONT,
+  BGSOUND,
+  BR,
+  COL,
+  COMMAND,
+  EMBED,
+  FRAME,
+  HR,
+  IMAGE,
+  IMG,
+  INPUT,
+  ISINDEX,
+  KEYGEN,
+  LINK,
+  MENUITEM,
+  META,
+  NEXTID,
+  PARAM,
+  SOURCE,
+  TRACK,
+  WBR,
+  END_OF_VOID_TAGS,
+
+  A,
+  ABBR,
+  ADDRESS,
+  ARTICLE,
+  ASIDE,
+  AUDIO,
+  B,
+  BDI,
+  BDO,
+  BLOCKQUOTE,
+  BODY,
+  BUTTON,
+  CANVAS,
+  CAPTION,
+  CITE,
+  CODE,
+  COLGROUP,
+  DATA,
+  DATALIST,
+  DD,
+  DEL,
+  DETAILS,
+  DFN,
+  DIALOG,
+  DIV,
+  DL,
+  DT,
+  EM,
+  FIELDSET,
+  FIGCAPTION,
+  FIGURE,
+  FOOTER,
+  FORM,
+  H1,
+  H2,
+  H3,
+  H4,
+  H5,
+  H6,
+  HEAD,
+  HEADER,
+  HGROUP,
+  HTML,
+  I,
+  IFRAME,
+  INS,
+  KBD,
+  LABEL,
+  LEGEND,
+  LI,
+  MAIN,
+  MAP,
+  MARK,
+  MATH,
+  MENU,
+  METER,
+  NAV,
+  NOSCRIPT,
+  OBJECT,
+  OL,
+  OPTGROUP,
+  OPTION,
+  OUTPUT,
+  P,
+  PICTURE,
+  PRE,
+  PROGRESS,
+  Q,
+  RB,
+  RP,
+  RT,
+  RTC,
+  RUBY,
+  S,
+  SAMP,
+  SCRIPT,
+  SECTION,
+  SELECT,
+  SLOT,
+  SMALL,
+  SPAN,
+  STRONG,
+  STYLE,
+  SUB,
+  SUMMARY,
+  SUP,
+  SVG,
+  TABLE,
+  TBODY,
+  TD,
+  TEMPLATE,
+  TEXTAREA,
+  TFOOT,
+  TH,
+  THEAD,
+  TIME,
+  TITLE,
+  TR,
+  U,
+  UL,
+  VAR,
+  VIDEO,
+
+  CUSTOM,
+};
+
+
+static const map<string, TagType> get_tag_map() {
+  map<string, TagType> result;
+#define TAG(name) result[#name] = name
+  TAG(AREA);
+  TAG(BASE);
+  TAG(BASEFONT);
+  TAG(BGSOUND);
+  TAG(BR);
+  TAG(COL);
+  TAG(COMMAND);
+  TAG(EMBED);
+  TAG(FRAME);
+  TAG(HR);
+  TAG(IMAGE);
+  TAG(IMG);
+  TAG(INPUT);
+  TAG(ISINDEX);
+  TAG(KEYGEN);
+  TAG(LINK);
+  TAG(MENUITEM);
+  TAG(META);
+  TAG(NEXTID);
+  TAG(PARAM);
+  TAG(SOURCE);
+  TAG(TRACK);
+  TAG(WBR);
+  TAG(A);
+  TAG(ABBR);
+  TAG(ADDRESS);
+  TAG(ARTICLE);
+  TAG(ASIDE);
+  TAG(AUDIO);
+  TAG(B);
+  TAG(BDI);
+  TAG(BDO);
+  TAG(BLOCKQUOTE);
+  TAG(BODY);
+  TAG(BUTTON);
+  TAG(CANVAS);
+  TAG(CAPTION);
+  TAG(CITE);
+  TAG(CODE);
+  TAG(COLGROUP);
+  TAG(DATA);
+  TAG(DATALIST);
+  TAG(DD);
+  TAG(DEL);
+  TAG(DETAILS);
+  TAG(DFN);
+  TAG(DIALOG);
+  TAG(DIV);
+  TAG(DL);
+  TAG(DT);
+  TAG(EM);
+  TAG(FIELDSET);
+  TAG(FIGCAPTION);
+  TAG(FIGURE);
+  TAG(FOOTER);
+  TAG(FORM);
+  TAG(H1);
+  TAG(H2);
+  TAG(H3);
+  TAG(H4);
+  TAG(H5);
+  TAG(H6);
+  TAG(HEAD);
+  TAG(HEADER);
+  TAG(HGROUP);
+  TAG(HTML);
+  TAG(I);
+  TAG(IFRAME);
+  TAG(INS);
+  TAG(KBD);
+  TAG(LABEL);
+  TAG(LEGEND);
+  TAG(LI);
+  TAG(MAIN);
+  TAG(MAP);
+  TAG(MARK);
+  TAG(MATH);
+  TAG(MENU);
+  TAG(METER);
+  TAG(NAV);
+  TAG(NOSCRIPT);
+  TAG(OBJECT);
+  TAG(OL);
+  TAG(OPTGROUP);
+  TAG(OPTION);
+  TAG(OUTPUT);
+  TAG(P);
+  TAG(PICTURE);
+  TAG(PRE);
+  TAG(PROGRESS);
+  TAG(Q);
+  TAG(RB);
+  TAG(RP);
+  TAG(RT);
+  TAG(RTC);
+  TAG(RUBY);
+  TAG(S);
+  TAG(SAMP);
+  TAG(SCRIPT);
+  TAG(SECTION);
+  TAG(SELECT);
+  TAG(SLOT);
+  TAG(SMALL);
+  TAG(SPAN);
+  TAG(STRONG);
+  TAG(STYLE);
+  TAG(SUB);
+  TAG(SUMMARY);
+  TAG(SUP);
+  TAG(SVG);
+  TAG(TABLE);
+  TAG(TBODY);
+  TAG(TD);
+  TAG(TEMPLATE);
+  TAG(TEXTAREA);
+  TAG(TFOOT);
+  TAG(TH);
+  TAG(THEAD);
+  TAG(TIME);
+  TAG(TITLE);
+  TAG(TR);
+  TAG(U);
+  TAG(UL);
+  TAG(VAR);
+  TAG(VIDEO);
+#undef TAG
+  return result;
+}
+
+static const map<string, TagType> TAG_TYPES_BY_TAG_NAME = get_tag_map();
+
+static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
+  ADDRESS,
+  ARTICLE,
+  ASIDE,
+  BLOCKQUOTE,
+  DETAILS,
+  DIV,
+  DL,
+  FIELDSET,
+  FIGCAPTION,
+  FIGURE,
+  FOOTER,
+  FORM,
+  H1,
+  H2,
+  H3,
+  H4,
+  H5,
+  H6,
+  HEADER,
+  HR,
+  MAIN,
+  NAV,
+  OL,
+  P,
+  PRE,
+  SECTION,
+};
+
+static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
+  TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
+  sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
+  sizeof(TagType)
+);
+
+struct Tag {
+  TagType type;
+  string custom_tag_name;
+
+  // This default constructor is used in the case where there is not enough space
+  // in the serialization buffer to store all of the tags. In that case, tags
+  // that cannot be serialized will be treated as having an unknown type. These
+  // tags will be closed via implicit end tags regardless of the next closing
+  // tag is encountered.
+  Tag() : type(END_OF_VOID_TAGS) {}
+
+  Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
+
+  bool operator==(const Tag &other) const {
+    if (type != other.type) return false;
+    if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
+    return true;
+  }
+
+  inline bool is_void() const {
+    return type < END_OF_VOID_TAGS;
+  }
+
+  inline bool can_contain(const Tag &tag) {
+    TagType child = tag.type;
+
+    switch (type) {
+      case LI: return child != LI;
+
+      case DT:
+      case DD:
+        return child != DT && child != DD;
+
+      case P:
+        return std::find(
+          TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
+          TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
+          tag.type
+        ) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
+
+      case COLGROUP:
+        return child == COL;
+
+      case RB:
+      case RT:
+      case RP:
+        return child != RB && child != RT && child != RP;
+
+      case OPTGROUP:
+        return child != OPTGROUP;
+
+      case TR:
+        return child != TR;
+
+      case TD:
+      case TH:
+        return child != TD && child != TH && child != TR;
+
+      default:
+        return true;
+    }
+  }
+
+  static inline Tag for_name(const string &name) {
+    map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
+    if (type != TAG_TYPES_BY_TAG_NAME.end()) {
+      return Tag(type->second, string());
+    } else {
+      return Tag(CUSTOM, name);
+    }
+  }
+};
diff --git a/test-grammars/html/src/tree_sitter/parser.h b/test-grammars/html/src/tree_sitter/parser.h
new file mode 100644
index 0000000..cbbc7b4
--- /dev/null
+++ b/test-grammars/html/src/tree_sitter/parser.h
@@ -0,0 +1,223 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+typedef uint16_t TSStateId;
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value            \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value,           \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/test-grammars/markdown-inline/LICENSE b/test-grammars/markdown-inline/LICENSE
new file mode 100644
index 0000000..c125939
--- /dev/null
+++ b/test-grammars/markdown-inline/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Matthias Deiml
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test-grammars/markdown-inline/highlights.scm b/test-grammars/markdown-inline/highlights.scm
new file mode 100644
index 0000000..ee0926a
--- /dev/null
+++ b/test-grammars/markdown-inline/highlights.scm
@@ -0,0 +1,39 @@
+;; From nvim-treesitter/nvim-treesitter
+[
+  (code_span)
+  (link_title)
+] @markup.raw.inline
+
+[
+  (emphasis_delimiter)
+  (code_span_delimiter)
+] @punctuation.bracket
+
+(emphasis) @markup.italic
+
+(strong_emphasis) @markup.bold
+
+(strikethrough) @markup.strikethrough
+
+[
+  (link_destination)
+  (uri_autolink)
+] @markup.link.url
+
+[
+  (link_text)
+  (image_description)
+] @markup.link.text
+
+(link_label) @markup.link.label
+
+[
+  (backslash_escape)
+  (hard_line_break)
+] @constant.character.escape
+
+(image ["[" "]" "(" ")"] @punctuation.bracket)
+(image "!" @punctuation.special)
+(inline_link ["[" "]" "(" ")"] @punctuation.bracket)
+(shortcut_link ["[" "]"] @punctuation.bracket)
+
diff --git a/test-grammars/markdown-inline/injections.scm b/test-grammars/markdown-inline/injections.scm
new file mode 100644
index 0000000..62b8267
--- /dev/null
+++ b/test-grammars/markdown-inline/injections.scm
@@ -0,0 +1,7 @@
+
+((html_tag) @injection.content 
+  (#set! injection.language "html") 
+  (#set! injection.include-unnamed-children)
+  (#set! injection.combined))
+
+((latex_block) @injection.content (#set! injection.language "latex") (#set! injection.include-unnamed-children))
diff --git a/test-grammars/markdown-inline/metadata.json b/test-grammars/markdown-inline/metadata.json
new file mode 100644
index 0000000..44667f5
--- /dev/null
+++ b/test-grammars/markdown-inline/metadata.json
@@ -0,0 +1,6 @@
+{
+  "repo": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
+  "rev": "62516e8c78380e3b51d5b55727995d2c511436d8",
+  "license": "MIT",
+  "compressed": true
+}
\ No newline at end of file
diff --git a/test-grammars/markdown-inline/src/grammar.json b/test-grammars/markdown-inline/src/grammar.json
new file mode 100644
index 0000000..8ba0b86
Binary files /dev/null and b/test-grammars/markdown-inline/src/grammar.json differ
diff --git a/test-grammars/markdown-inline/src/parser.c b/test-grammars/markdown-inline/src/parser.c
new file mode 100644
index 0000000..9cf0fcc
Binary files /dev/null and b/test-grammars/markdown-inline/src/parser.c differ
diff --git a/test-grammars/markdown-inline/src/scanner.c b/test-grammars/markdown-inline/src/scanner.c
new file mode 100644
index 0000000..b5e48b4
--- /dev/null
+++ b/test-grammars/markdown-inline/src/scanner.c
@@ -0,0 +1,397 @@
+#include "tree_sitter/parser.h"
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+// For explanation of the tokens see grammar.js
+typedef enum {
+    ERROR,
+    TRIGGER_ERROR,
+    CODE_SPAN_START,
+    CODE_SPAN_CLOSE,
+    EMPHASIS_OPEN_STAR,
+    EMPHASIS_OPEN_UNDERSCORE,
+    EMPHASIS_CLOSE_STAR,
+    EMPHASIS_CLOSE_UNDERSCORE,
+    LAST_TOKEN_WHITESPACE,
+    LAST_TOKEN_PUNCTUATION,
+    STRIKETHROUGH_OPEN,
+    STRIKETHROUGH_CLOSE,
+    LATEX_SPAN_START,
+    LATEX_SPAN_CLOSE,
+    UNCLOSED_SPAN
+} TokenType;
+
+// Determines if a character is punctuation as defined by the markdown spec.
+static bool is_punctuation(char chr) {
+    return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
+           (chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
+}
+
+// State bitflags used with `Scanner.state`
+
+// TODO
+static UNUSED const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
+// Current delimiter run is opening
+static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
+
+// Convenience function to emit the error token. This is done to stop invalid
+// parse branches. Specifically:
+// 1. When encountering a newline after a line break that ended a paragraph, and
+// no new block
+//    has been opened.
+// 2. When encountering a new block after a soft line break.
+// 3. When a `$._trigger_error` token is valid, which is used to stop parse
+// branches through
+//    normal tree-sitter grammar rules.
+//
+// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
+// grammar.js
+static bool error(TSLexer *lexer) {
+    lexer->result_symbol = ERROR;
+    return true;
+}
+
+typedef struct {
+    // Parser state flags
+    uint8_t state;
+    uint8_t code_span_delimiter_length;
+    uint8_t latex_span_delimiter_length;
+    // The number of characters remaining in the currrent emphasis delimiter
+    // run.
+    uint8_t num_emphasis_delimiters_left;
+
+} Scanner;
+
+// Write the whole state of a Scanner to a byte buffer
+static unsigned serialize(Scanner *s, char *buffer) {
+    unsigned size = 0;
+    buffer[size++] = (char)s->state;
+    buffer[size++] = (char)s->code_span_delimiter_length;
+    buffer[size++] = (char)s->latex_span_delimiter_length;
+    buffer[size++] = (char)s->num_emphasis_delimiters_left;
+    return size;
+}
+
+// Read the whole state of a Scanner from a byte buffer
+// `serizalize` and `deserialize` should be fully symmetric.
+static void deserialize(Scanner *s, const char *buffer, unsigned length) {
+    s->state = 0;
+    s->code_span_delimiter_length = 0;
+    s->latex_span_delimiter_length = 0;
+    s->num_emphasis_delimiters_left = 0;
+    if (length > 0) {
+        size_t size = 0;
+        s->state = (uint8_t)buffer[size++];
+        s->code_span_delimiter_length = (uint8_t)buffer[size++];
+        s->latex_span_delimiter_length = (uint8_t)buffer[size++];
+        s->num_emphasis_delimiters_left = (uint8_t)buffer[size++];
+    }
+}
+
+static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t *delimiter_length,
+                                 const bool *valid_symbols,
+                                 const char delimiter,
+                                 const TokenType open_token,
+                                 const TokenType close_token) {
+    uint8_t level = 0;
+    while (lexer->lookahead == delimiter) {
+        lexer->advance(lexer, false);
+        level++;
+    }
+    lexer->mark_end(lexer);
+    if (level == *delimiter_length && valid_symbols[close_token]) {
+        *delimiter_length = 0;
+        lexer->result_symbol = close_token;
+        return true;
+    }
+    if (valid_symbols[open_token]) {
+        // Parse ahead to check if there is a closing delimiter
+        size_t close_level = 0;
+        while (!lexer->eof(lexer)) {
+            if (lexer->lookahead == delimiter) {
+                close_level++;
+            } else {
+                if (close_level == level) {
+                    // Found a matching delimiter
+                    break;
+                }
+                close_level = 0;
+            }
+            lexer->advance(lexer, false);
+        }
+        if (close_level == level) {
+            *delimiter_length = level;
+            lexer->result_symbol = open_token;
+            return true;
+        }
+        if (valid_symbols[UNCLOSED_SPAN]) {
+            lexer->result_symbol = UNCLOSED_SPAN;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_backtick(Scanner *s, TSLexer *lexer,
+                           const bool *valid_symbols) {
+    return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length,
+                                valid_symbols, '`', CODE_SPAN_START,
+                                CODE_SPAN_CLOSE);
+}
+
+static bool parse_dollar(Scanner *s, TSLexer *lexer,
+                         const bool *valid_symbols) {
+    return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length,
+                                valid_symbols, '$', LATEX_SPAN_START,
+                                LATEX_SPAN_CLOSE);
+}
+
+static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    lexer->advance(lexer, false);
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
+    if (s->num_emphasis_delimiters_left > 0) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[EMPHASIS_OPEN_STAR]) {
+            s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
+            lexer->result_symbol = EMPHASIS_OPEN_STAR;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+        if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
+            lexer->result_symbol = EMPHASIS_CLOSE_STAR;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+    }
+    lexer->mark_end(lexer);
+    // Otherwise count the number of stars
+    uint8_t star_count = 1;
+    while (lexer->lookahead == '*') {
+        star_count++;
+        lexer->advance(lexer, false);
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[EMPHASIS_OPEN_STAR] ||
+        valid_symbols[EMPHASIS_CLOSE_STAR]) {
+        // The desicion made for the first star also counts for all the
+        // following stars in the delimiter run. Rembemer how many there are.
+        s->num_emphasis_delimiters_left = star_count - 1;
+        // Look ahead to the next symbol (after the last star) to find out if it
+        // is whitespace punctuation or other.
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
+        // Information about the last token is in valid_symbols. See grammar.js
+        // for these tokens for how this is done.
+        if (valid_symbols[EMPHASIS_CLOSE_STAR] &&
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace)) {
+            // Closing delimiters take precedence
+            s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = EMPHASIS_CLOSE_STAR;
+            return true;
+        }
+        if (!next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE])) {
+            s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = EMPHASIS_OPEN_STAR;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    lexer->advance(lexer, false);
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
+    if (s->num_emphasis_delimiters_left > 0) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[STRIKETHROUGH_OPEN]) {
+            s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
+            lexer->result_symbol = STRIKETHROUGH_OPEN;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+        if (valid_symbols[STRIKETHROUGH_CLOSE]) {
+            lexer->result_symbol = STRIKETHROUGH_CLOSE;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+    }
+    lexer->mark_end(lexer);
+    // Otherwise count the number of tildes
+    uint8_t star_count = 1;
+    while (lexer->lookahead == '~') {
+        star_count++;
+        lexer->advance(lexer, false);
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[STRIKETHROUGH_OPEN] ||
+        valid_symbols[STRIKETHROUGH_CLOSE]) {
+        // The desicion made for the first star also counts for all the
+        // following stars in the delimiter run. Rembemer how many there are.
+        s->num_emphasis_delimiters_left = star_count - 1;
+        // Look ahead to the next symbol (after the last star) to find out if it
+        // is whitespace punctuation or other.
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
+        // Information about the last token is in valid_symbols. See grammar.js
+        // for these tokens for how this is done.
+        if (valid_symbols[STRIKETHROUGH_CLOSE] &&
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace)) {
+            // Closing delimiters take precedence
+            s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = STRIKETHROUGH_CLOSE;
+            return true;
+        }
+        if (!next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE])) {
+            s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = STRIKETHROUGH_OPEN;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_underscore(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
+    lexer->advance(lexer, false);
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
+    if (s->num_emphasis_delimiters_left > 0) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
+            s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
+            lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+        if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
+            lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
+            s->num_emphasis_delimiters_left--;
+            return true;
+        }
+    }
+    lexer->mark_end(lexer);
+    // Otherwise count the number of stars
+    uint8_t underscore_count = 1;
+    while (lexer->lookahead == '_') {
+        underscore_count++;
+        lexer->advance(lexer, false);
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] ||
+        valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
+        // The desicion made for the first underscore also counts for all the
+        // following underscores in the delimiter run. Rembemer how many there are.
+        s->num_emphasis_delimiters_left = underscore_count - 1;
+        // Look ahead to the next symbol (after the last underscore) to find out if it
+        // is whitespace punctuation or other.
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
+        // Information about the last token is in valid_symbols. See grammar.js
+        // for these tokens for how this is done.
+        if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] &&
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace)) {
+            // Closing delimiters take precedence
+            s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
+            return true;
+        }
+        if (!next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE])) {
+            s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
+            lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    // A normal tree-sitter rule decided that the current branch is invalid and
+    // now "requests" an error to stop the branch
+    if (valid_symbols[TRIGGER_ERROR]) {
+        return error(lexer);
+    }
+
+    // Decide which tokens to consider based on the first non-whitespace
+    // character
+    switch (lexer->lookahead) {
+        case '`':
+            // A backtick could mark the beginning or ending of a code span or a
+            // fenced code block.
+            return parse_backtick(s, lexer, valid_symbols);
+        case '$':
+            return parse_dollar(s, lexer, valid_symbols);
+        case '*':
+            // A star could either mark the beginning or ending of emphasis, a
+            // list item or thematic break. This code is similar to the code for
+            // '_' and '+'.
+            return parse_star(s, lexer, valid_symbols);
+        case '_':
+            return parse_underscore(s, lexer, valid_symbols);
+        case '~':
+            return parse_tilde(s, lexer, valid_symbols);
+    }
+    return false;
+}
+
+void *tree_sitter_markdown_inline_external_scanner_create() {
+    Scanner *s = (Scanner *)malloc(sizeof(Scanner));
+    deserialize(s, NULL, 0);
+    return s;
+}
+
+bool tree_sitter_markdown_inline_external_scanner_scan(
+    void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    return scan(scanner, lexer, valid_symbols);
+}
+
+unsigned tree_sitter_markdown_inline_external_scanner_serialize(void *payload,
+                                                                char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+
+void tree_sitter_markdown_inline_external_scanner_deserialize(void *payload,
+                                                              char *buffer,
+                                                              unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+
+void tree_sitter_markdown_inline_external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    free(scanner);
+}
diff --git a/test-grammars/markdown-inline/src/tree_sitter/alloc.h b/test-grammars/markdown-inline/src/tree_sitter/alloc.h
new file mode 100644
index 0000000..1f4466d
--- /dev/null
+++ b/test-grammars/markdown-inline/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t);
+extern void *(*ts_current_calloc)(size_t, size_t);
+extern void *(*ts_current_realloc)(void *, size_t);
+extern void (*ts_current_free)(void *);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/test-grammars/markdown-inline/src/tree_sitter/array.h b/test-grammars/markdown-inline/src/tree_sitter/array.h
new file mode 100644
index 0000000..186ba67
--- /dev/null
+++ b/test-grammars/markdown-inline/src/tree_sitter/array.h
@@ -0,0 +1,287 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  (_array__grow((Array *)(self), count, array_elem_size(self)), \
+   memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
+   (self)->size += (count))
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/test-grammars/markdown-inline/src/tree_sitter/parser.h b/test-grammars/markdown-inline/src/tree_sitter/parser.h
new file mode 100644
index 0000000..17b4fde
--- /dev/null
+++ b/test-grammars/markdown-inline/src/tree_sitter/parser.h
@@ -0,0 +1,230 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/test-grammars/markdown/LICENSE b/test-grammars/markdown/LICENSE
new file mode 100644
index 0000000..c125939
--- /dev/null
+++ b/test-grammars/markdown/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Matthias Deiml
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test-grammars/markdown/highlights.scm b/test-grammars/markdown/highlights.scm
new file mode 100644
index 0000000..a80fc1b
--- /dev/null
+++ b/test-grammars/markdown/highlights.scm
@@ -0,0 +1,62 @@
+
+(setext_heading (paragraph) @markup.heading.1 (setext_h1_underline) @markup.heading.marker)
+(setext_heading (paragraph) @markup.heading.2 (setext_h2_underline) @markup.heading.marker)
+
+(atx_heading (atx_h1_marker) @markup.heading.marker) @markup.heading.1
+(atx_heading (atx_h2_marker) @markup.heading.marker) @markup.heading.2
+(atx_heading (atx_h3_marker) @markup.heading.marker) @markup.heading.3
+(atx_heading (atx_h4_marker) @markup.heading.marker) @markup.heading.4
+(atx_heading (atx_h5_marker) @markup.heading.marker) @markup.heading.5
+(atx_heading (atx_h6_marker) @markup.heading.marker) @markup.heading.6
+
+[
+  (indented_code_block)
+  (fenced_code_block)
+] @markup.raw.block
+
+(info_string) @label
+
+[
+  (fenced_code_block_delimiter)
+] @punctuation.bracket
+
+[
+  (link_destination)
+] @markup.link.url
+
+[
+  (link_label)
+] @markup.link.label
+
+[
+  (list_marker_plus)
+  (list_marker_minus)
+  (list_marker_star)
+] @markup.list.unnumbered
+
+[
+  (list_marker_dot)
+  (list_marker_parenthesis)
+] @markup.list.numbered
+
+(task_list_marker_checked) @markup.list.checked
+(task_list_marker_unchecked) @markup.list.unchecked
+
+(thematic_break) @punctuation.special
+
+[
+  (block_continuation)
+  (block_quote_marker)
+] @punctuation.special
+
+[
+  (backslash_escape)
+] @string.escape
+
+(block_quote) @markup.quote
+
+(pipe_table_row
+  "|" @punctuation.special)
+(pipe_table_header
+  "|" @punctuation.special)
+(pipe_table_delimiter_row) @punctuation.special
diff --git a/test-grammars/markdown/injections.scm b/test-grammars/markdown/injections.scm
new file mode 100644
index 0000000..7ed09c6
--- /dev/null
+++ b/test-grammars/markdown/injections.scm
@@ -0,0 +1,22 @@
+; From nvim-treesitter/nvim-treesitter
+
+(fenced_code_block
+  (code_fence_content) @injection.shebang @injection.content
+  (#set! injection.include-unnamed-children))
+
+(fenced_code_block
+  (info_string
+    (language) @injection.language)
+  (code_fence_content) @injection.content (#set! injection.include-unnamed-children))
+
+((html_block) @injection.content
+ (#set! injection.language "html")
+ (#set! injection.include-unnamed-children)
+ (#set! injection.combined))
+
+((pipe_table_cell) @injection.content (#set! injection.language "markdown-inline") (#set! injection.include-unnamed-children))
+
+((minus_metadata) @injection.content (#set! injection.language "yaml") (#set! injection.include-unnamed-children))
+((plus_metadata) @injection.content (#set! injection.language "toml") (#set! injection.include-unnamed-children))
+
+((inline) @injection.content (#set! injection.language "markdown-inline") (#set! injection.include-unnamed-children))
diff --git a/test-grammars/markdown/metadata.json b/test-grammars/markdown/metadata.json
new file mode 100644
index 0000000..44667f5
--- /dev/null
+++ b/test-grammars/markdown/metadata.json
@@ -0,0 +1,6 @@
+{
+  "repo": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
+  "rev": "62516e8c78380e3b51d5b55727995d2c511436d8",
+  "license": "MIT",
+  "compressed": true
+}
\ No newline at end of file
diff --git a/test-grammars/markdown/src/grammar.json b/test-grammars/markdown/src/grammar.json
new file mode 100644
index 0000000..963b8f9
Binary files /dev/null and b/test-grammars/markdown/src/grammar.json differ
diff --git a/test-grammars/markdown/src/parser.c b/test-grammars/markdown/src/parser.c
new file mode 100644
index 0000000..df56218
Binary files /dev/null and b/test-grammars/markdown/src/parser.c differ
diff --git a/test-grammars/markdown/src/scanner.c b/test-grammars/markdown/src/scanner.c
new file mode 100644
index 0000000..969e806
--- /dev/null
+++ b/test-grammars/markdown/src/scanner.c
@@ -0,0 +1,1597 @@
+#include "tree_sitter/parser.h"
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+// For explanation of the tokens see grammar.js
+typedef enum {
+    LINE_ENDING,
+    SOFT_LINE_ENDING,
+    BLOCK_CLOSE,
+    BLOCK_CONTINUATION,
+    BLOCK_QUOTE_START,
+    INDENTED_CHUNK_START,
+    ATX_H1_MARKER,
+    ATX_H2_MARKER,
+    ATX_H3_MARKER,
+    ATX_H4_MARKER,
+    ATX_H5_MARKER,
+    ATX_H6_MARKER,
+    SETEXT_H1_UNDERLINE,
+    SETEXT_H2_UNDERLINE,
+    THEMATIC_BREAK,
+    LIST_MARKER_MINUS,
+    LIST_MARKER_PLUS,
+    LIST_MARKER_STAR,
+    LIST_MARKER_PARENTHESIS,
+    LIST_MARKER_DOT,
+    LIST_MARKER_MINUS_DONT_INTERRUPT,
+    LIST_MARKER_PLUS_DONT_INTERRUPT,
+    LIST_MARKER_STAR_DONT_INTERRUPT,
+    LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
+    LIST_MARKER_DOT_DONT_INTERRUPT,
+    FENCED_CODE_BLOCK_START_BACKTICK,
+    FENCED_CODE_BLOCK_START_TILDE,
+    BLANK_LINE_START,
+    FENCED_CODE_BLOCK_END_BACKTICK,
+    FENCED_CODE_BLOCK_END_TILDE,
+    HTML_BLOCK_1_START,
+    HTML_BLOCK_1_END,
+    HTML_BLOCK_2_START,
+    HTML_BLOCK_3_START,
+    HTML_BLOCK_4_START,
+    HTML_BLOCK_5_START,
+    HTML_BLOCK_6_START,
+    HTML_BLOCK_7_START,
+    CLOSE_BLOCK,
+    NO_INDENTED_CHUNK,
+    ERROR,
+    TRIGGER_ERROR,
+    TOKEN_EOF,
+    MINUS_METADATA,
+    PLUS_METADATA,
+    PIPE_TABLE_START,
+    PIPE_TABLE_LINE_ENDING,
+} TokenType;
+
+// Description of a block on the block stack.
+//
+// LIST_ITEM is a list item with minimal indentation (content begins at indent
+// level 2) while LIST_ITEM_MAX_INDENTATION represents a list item with maximal
+// indentation without being considered a indented code block.
+//
+// ANONYMOUS represents any block that whose close is not handled by the
+// external s.
+typedef enum {
+    BLOCK_QUOTE,
+    INDENTED_CODE_BLOCK,
+    LIST_ITEM,
+    LIST_ITEM_1_INDENTATION,
+    LIST_ITEM_2_INDENTATION,
+    LIST_ITEM_3_INDENTATION,
+    LIST_ITEM_4_INDENTATION,
+    LIST_ITEM_5_INDENTATION,
+    LIST_ITEM_6_INDENTATION,
+    LIST_ITEM_7_INDENTATION,
+    LIST_ITEM_8_INDENTATION,
+    LIST_ITEM_9_INDENTATION,
+    LIST_ITEM_10_INDENTATION,
+    LIST_ITEM_11_INDENTATION,
+    LIST_ITEM_12_INDENTATION,
+    LIST_ITEM_13_INDENTATION,
+    LIST_ITEM_14_INDENTATION,
+    LIST_ITEM_MAX_INDENTATION,
+    FENCED_CODE_BLOCK,
+    ANONYMOUS,
+} Block;
+
+// Determines if a character is punctuation as defined by the markdown spec.
+static bool is_punctuation(char chr) {
+    return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
+           (chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
+}
+
+// Returns the indentation level which lines of a list item should have at
+// minimum. Should only be called with blocks for which `is_list_item` returns
+// true.
+static uint8_t list_item_indentation(Block block) {
+    return (uint8_t)(block - LIST_ITEM + 2);
+}
+
+#define NUM_HTML_TAG_NAMES_RULE_1 3
+
+static const char *const HTML_TAG_NAMES_RULE_1[NUM_HTML_TAG_NAMES_RULE_1] = {
+    "pre", "script", "style"};
+
+#define NUM_HTML_TAG_NAMES_RULE_7 62
+
+static const char *const HTML_TAG_NAMES_RULE_7[NUM_HTML_TAG_NAMES_RULE_7] = {
+    "address",  "article",    "aside",  "base",     "basefont", "blockquote",
+    "body",     "caption",    "center", "col",      "colgroup", "dd",
+    "details",  "dialog",     "dir",    "div",      "dl",       "dt",
+    "fieldset", "figcaption", "figure", "footer",   "form",     "frame",
+    "frameset", "h1",         "h2",     "h3",       "h4",       "h5",
+    "h6",       "head",       "header", "hr",       "html",     "iframe",
+    "legend",   "li",         "link",   "main",     "menu",     "menuitem",
+    "nav",      "noframes",   "ol",     "optgroup", "option",   "p",
+    "param",    "section",    "source", "summary",  "table",    "tbody",
+    "td",       "tfoot",      "th",     "thead",    "title",    "tr",
+    "track",    "ul"};
+
+// For explanation of the tokens see grammar.js
+static const bool paragraph_interrupt_symbols[] = {
+    false, // LINE_ENDING,
+    false, // SOFT_LINE_ENDING,
+    false, // BLOCK_CLOSE,
+    false, // BLOCK_CONTINUATION,
+    true,  // BLOCK_QUOTE_START,
+    false, // INDENTED_CHUNK_START,
+    true,  // ATX_H1_MARKER,
+    true,  // ATX_H2_MARKER,
+    true,  // ATX_H3_MARKER,
+    true,  // ATX_H4_MARKER,
+    true,  // ATX_H5_MARKER,
+    true,  // ATX_H6_MARKER,
+    true,  // SETEXT_H1_UNDERLINE,
+    true,  // SETEXT_H2_UNDERLINE,
+    true,  // THEMATIC_BREAK,
+    true,  // LIST_MARKER_MINUS,
+    true,  // LIST_MARKER_PLUS,
+    true,  // LIST_MARKER_STAR,
+    true,  // LIST_MARKER_PARENTHESIS,
+    true,  // LIST_MARKER_DOT,
+    false, // LIST_MARKER_MINUS_DONT_INTERRUPT,
+    false, // LIST_MARKER_PLUS_DONT_INTERRUPT,
+    false, // LIST_MARKER_STAR_DONT_INTERRUPT,
+    false, // LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
+    false, // LIST_MARKER_DOT_DONT_INTERRUPT,
+    true,  // FENCED_CODE_BLOCK_START_BACKTICK,
+    true,  // FENCED_CODE_BLOCK_START_TILDE,
+    true,  // BLANK_LINE_START,
+    false, // FENCED_CODE_BLOCK_END_BACKTICK,
+    false, // FENCED_CODE_BLOCK_END_TILDE,
+    true,  // HTML_BLOCK_1_START,
+    false, // HTML_BLOCK_1_END,
+    true,  // HTML_BLOCK_2_START,
+    true,  // HTML_BLOCK_3_START,
+    true,  // HTML_BLOCK_4_START,
+    true,  // HTML_BLOCK_5_START,
+    true,  // HTML_BLOCK_6_START,
+    false, // HTML_BLOCK_7_START,
+    false, // CLOSE_BLOCK,
+    false, // NO_INDENTED_CHUNK,
+    false, // ERROR,
+    false, // TRIGGER_ERROR,
+    false, // EOF,
+    false, // MINUS_METADATA,
+    false, // PLUS_METADATA,
+    true,  // PIPE_TABLE_START,
+    false, // PIPE_TABLE_LINE_ENDING,
+};
+
+// State bitflags used with `Scanner.state`
+
+// Currently matching (at the beginning of a line)
+static const uint8_t STATE_MATCHING = 0x1 << 0;
+// Last line break was inside a paragraph
+static const uint8_t STATE_WAS_SOFT_LINE_BREAK = 0x1 << 1;
+// Block should be closed after next line break
+static const uint8_t STATE_CLOSE_BLOCK = 0x1 << 4;
+
+static size_t roundup_32(size_t x) {
+    x--;
+
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+
+    x++;
+
+    return x;
+}
+
+typedef struct {
+    // A stack of open blocks in the current parse state
+    struct {
+        size_t size;
+        size_t capacity;
+        Block *items;
+    } open_blocks;
+
+    // Parser state flags
+    uint8_t state;
+    // Number of blocks that have been matched so far. Only changes during
+    // matching and is reset after every line ending
+    uint8_t matched;
+    // Consumed but "unused" indentation. Sometimes a tab needs to be "split" to
+    // be used in multiple tokens.
+    uint8_t indentation;
+    // The current column. Used to decide how many spaces a tab should equal
+    uint8_t column;
+    // The delimiter length of the currently open fenced code block
+    uint8_t fenced_code_block_delimiter_length;
+
+    bool simulate;
+} Scanner;
+
+static void push_block(Scanner *s, Block b) {
+    if (s->open_blocks.size == s->open_blocks.capacity) {
+        s->open_blocks.capacity =
+            s->open_blocks.capacity ? s->open_blocks.capacity << 1 : 8;
+        void *tmp = realloc(s->open_blocks.items,
+                            sizeof(Block) * s->open_blocks.capacity);
+        assert(tmp != NULL);
+        s->open_blocks.items = tmp;
+    }
+
+    s->open_blocks.items[s->open_blocks.size++] = b;
+}
+
+static inline Block pop_block(Scanner *s) {
+    return s->open_blocks.items[--s->open_blocks.size];
+}
+
+// Write the whole state of a Scanner to a byte buffer
+static unsigned serialize(Scanner *s, char *buffer) {
+    unsigned size = 0;
+    buffer[size++] = (char)s->state;
+    buffer[size++] = (char)s->matched;
+    buffer[size++] = (char)s->indentation;
+    buffer[size++] = (char)s->column;
+    buffer[size++] = (char)s->fenced_code_block_delimiter_length;
+    size_t blocks_count = s->open_blocks.size;
+    if (blocks_count > 0) {
+        memcpy(&buffer[size], s->open_blocks.items,
+               blocks_count * sizeof(Block));
+        size += blocks_count * sizeof(Block);
+    }
+    return size;
+}
+
+// Read the whole state of a Scanner from a byte buffer
+// `serizalize` and `deserialize` should be fully symmetric.
+static void deserialize(Scanner *s, const char *buffer, unsigned length) {
+    s->open_blocks.size = 0;
+    s->open_blocks.capacity = 0;
+    s->state = 0;
+    s->matched = 0;
+    s->indentation = 0;
+    s->column = 0;
+    s->fenced_code_block_delimiter_length = 0;
+    if (length > 0) {
+        size_t size = 0;
+        s->state = (uint8_t)buffer[size++];
+        s->matched = (uint8_t)buffer[size++];
+        s->indentation = (uint8_t)buffer[size++];
+        s->column = (uint8_t)buffer[size++];
+        s->fenced_code_block_delimiter_length = (uint8_t)buffer[size++];
+        size_t blocks_size = length - size;
+        if (blocks_size > 0) {
+            size_t blocks_count = blocks_size / sizeof(Block);
+
+            // ensure open blocks has enough room
+            if (s->open_blocks.capacity < blocks_count) {
+              size_t capacity = roundup_32(blocks_count);
+              void *tmp = realloc(s->open_blocks.items,
+                            sizeof(Block) * capacity);
+              assert(tmp != NULL);
+              s->open_blocks.items = tmp;
+              s->open_blocks.capacity = capacity;
+            }
+            memcpy(s->open_blocks.items, &buffer[size], blocks_size);
+            s->open_blocks.size = blocks_count;
+        }
+    }
+}
+
+static void mark_end(Scanner *s, TSLexer *lexer) {
+    if (!s->simulate) {
+        lexer->mark_end(lexer);
+    }
+}
+
+// Convenience function to emit the error token. This is done to stop invalid
+// parse branches. Specifically:
+// 1. When encountering a newline after a line break that ended a paragraph, and
+// no new block
+//    has been opened.
+// 2. When encountering a new block after a soft line break.
+// 3. When a `$._trigger_error` token is valid, which is used to stop parse
+// branches through
+//    normal tree-sitter grammar rules.
+//
+// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
+// grammar.js
+static bool error(TSLexer *lexer) {
+    lexer->result_symbol = ERROR;
+    return true;
+}
+
+// Advance the lexer one character
+// Also keeps track of the current column, counting tabs as spaces with tab stop
+// 4 See https://github.github.com/gfm/#tabs
+static size_t advance(Scanner *s, TSLexer *lexer) {
+    size_t size = 1;
+    if (lexer->lookahead == '\t') {
+        size = 4 - s->column;
+        s->column = 0;
+    } else {
+        s->column = (s->column + 1) % 4;
+    }
+    lexer->advance(lexer, false);
+    return size;
+}
+
+// Try to match the given block, i.e. consume all tokens that belong to the
+// block. These are
+// 1. indentation for list items and indented code blocks
+// 2. '>' for block quotes
+// Returns true if the block is matched and false otherwise
+static bool match(Scanner *s, TSLexer *lexer, Block block) {
+    switch (block) {
+        case INDENTED_CODE_BLOCK:
+            while (s->indentation < 4) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            if (s->indentation >= 4 && lexer->lookahead != '\n' &&
+                lexer->lookahead != '\r') {
+                s->indentation -= 4;
+                return true;
+            }
+            break;
+        case LIST_ITEM:
+        case LIST_ITEM_1_INDENTATION:
+        case LIST_ITEM_2_INDENTATION:
+        case LIST_ITEM_3_INDENTATION:
+        case LIST_ITEM_4_INDENTATION:
+        case LIST_ITEM_5_INDENTATION:
+        case LIST_ITEM_6_INDENTATION:
+        case LIST_ITEM_7_INDENTATION:
+        case LIST_ITEM_8_INDENTATION:
+        case LIST_ITEM_9_INDENTATION:
+        case LIST_ITEM_10_INDENTATION:
+        case LIST_ITEM_11_INDENTATION:
+        case LIST_ITEM_12_INDENTATION:
+        case LIST_ITEM_13_INDENTATION:
+        case LIST_ITEM_14_INDENTATION:
+        case LIST_ITEM_MAX_INDENTATION:
+            while (s->indentation < list_item_indentation(block)) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            if (s->indentation >= list_item_indentation(block)) {
+                s->indentation -= list_item_indentation(block);
+                return true;
+            }
+            if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+                s->indentation = 0;
+                return true;
+            }
+            break;
+        case BLOCK_QUOTE:
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                s->indentation += advance(s, lexer);
+            }
+            if (lexer->lookahead == '>') {
+                advance(s, lexer);
+                s->indentation = 0;
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer) - 1;
+                }
+                return true;
+            }
+            break;
+        case FENCED_CODE_BLOCK:
+        case ANONYMOUS:
+            return true;
+    }
+    return false;
+}
+
+static bool parse_fenced_code_block(Scanner *s, const char delimiter,
+                                    TSLexer *lexer, const bool *valid_symbols) {
+    // count the number of backticks
+    uint8_t level = 0;
+    while (lexer->lookahead == delimiter) {
+        advance(s, lexer);
+        level++;
+    }
+    mark_end(s, lexer);
+    // If this is able to close a fenced code block then that is the only valid
+    // interpretation. It can only close a fenced code block if the number of
+    // backticks is at least the number of backticks of the opening delimiter.
+    // Also it cannot be indented more than 3 spaces.
+    if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_END_BACKTICK]
+                          : valid_symbols[FENCED_CODE_BLOCK_END_TILDE]) &&
+        s->indentation < 4 && level >= s->fenced_code_block_delimiter_length &&
+        (lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+        s->fenced_code_block_delimiter_length = 0;
+        lexer->result_symbol = delimiter == '`' ? FENCED_CODE_BLOCK_END_BACKTICK
+                                                : FENCED_CODE_BLOCK_END_TILDE;
+        return true;
+    }
+    // If this could be the start of a fenced code block, check if the info
+    // string contains any backticks.
+    if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_START_BACKTICK]
+                          : valid_symbols[FENCED_CODE_BLOCK_START_TILDE]) &&
+        level >= 3) {
+        bool info_string_has_backtick = false;
+        if (delimiter == '`') {
+            while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                   !lexer->eof(lexer)) {
+                if (lexer->lookahead == '`') {
+                    info_string_has_backtick = true;
+                    break;
+                }
+                advance(s, lexer);
+            }
+        }
+        // If it does not then choose to interpret this as the start of a fenced
+        // code block.
+        if (!info_string_has_backtick) {
+            lexer->result_symbol = delimiter == '`'
+                                       ? FENCED_CODE_BLOCK_START_BACKTICK
+                                       : FENCED_CODE_BLOCK_START_TILDE;
+            if (!s->simulate)
+                push_block(s, FENCED_CODE_BLOCK);
+            // Remember the length of the delimiter for later, since we need it
+            // to decide whether a sequence of backticks can close the block.
+            s->fenced_code_block_delimiter_length = level;
+            s->indentation = 0;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    advance(s, lexer);
+    mark_end(s, lexer);
+    // Otherwise count the number of stars permitting whitespaces between them.
+    size_t star_count = 1;
+    // Also remember how many stars there are before the first whitespace...
+    // ...and how many spaces follow the first star.
+    uint8_t extra_indentation = 0;
+    for (;;) {
+        if (lexer->lookahead == '*') {
+            if (star_count == 1 && extra_indentation >= 1 &&
+                valid_symbols[LIST_MARKER_STAR]) {
+                // If we get to this point then the token has to be at least
+                // this long. We need to call `mark_end` here in case we decide
+                // later that this is a list item.
+                mark_end(s, lexer);
+            }
+            star_count++;
+            advance(s, lexer);
+        } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            if (star_count == 1) {
+                extra_indentation += advance(s, lexer);
+            } else {
+                advance(s, lexer);
+            }
+        } else {
+            break;
+        }
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+    bool dont_interrupt = false;
+    if (star_count == 1 && line_end) {
+        extra_indentation = 1;
+        // line is empty so don't interrupt paragraphs if this is a list marker
+        dont_interrupt = s->matched == s->open_blocks.size;
+    }
+    // If there were at least 3 stars then this could be a thematic break
+    bool thematic_break = star_count >= 3 && line_end;
+    // If there was a star and at least one space after that star then this
+    // could be a list marker.
+    bool list_marker_star = star_count >= 1 && extra_indentation >= 1;
+    if (valid_symbols[THEMATIC_BREAK] && thematic_break && s->indentation < 4) {
+        // If a thematic break is valid then it takes precedence
+        lexer->result_symbol = THEMATIC_BREAK;
+        mark_end(s, lexer);
+        s->indentation = 0;
+        return true;
+    }
+    if ((dont_interrupt ? valid_symbols[LIST_MARKER_STAR_DONT_INTERRUPT]
+                        : valid_symbols[LIST_MARKER_STAR]) &&
+        list_marker_star) {
+        // List markers take precedence over emphasis markers
+        // If star_count > 1 then we already called mark_end at the right point.
+        // Otherwise the token should go until this point.
+        if (star_count == 1) {
+            mark_end(s, lexer);
+        }
+        // Not counting one space...
+        extra_indentation--;
+        // ... check if the list item begins with an indented code block
+        if (extra_indentation <= 3) {
+            // If not then calculate the indentation level of the list item
+            // content as indentation of list marker + indentation after list
+            // marker - 1
+            extra_indentation += s->indentation;
+            s->indentation = 0;
+        } else {
+            // Otherwise the indentation level is just the indentation of the
+            // list marker. We keep the indentation after the list marker for
+            // later blocks.
+            uint8_t temp = s->indentation;
+            s->indentation = extra_indentation;
+            extra_indentation = temp;
+        }
+        if (!s->simulate)
+            push_block(s, (Block)(LIST_ITEM + extra_indentation));
+        lexer->result_symbol =
+            dont_interrupt ? LIST_MARKER_STAR_DONT_INTERRUPT : LIST_MARKER_STAR;
+        return true;
+    }
+    return false;
+}
+
+static bool parse_thematic_break_underscore(Scanner *s, TSLexer *lexer,
+                                            const bool *valid_symbols) {
+    advance(s, lexer);
+    mark_end(s, lexer);
+    size_t underscore_count = 1;
+    for (;;) {
+        if (lexer->lookahead == '_') {
+            underscore_count++;
+            advance(s, lexer);
+        } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        } else {
+            break;
+        }
+    }
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+    if (underscore_count >= 3 && line_end && valid_symbols[THEMATIC_BREAK]) {
+        lexer->result_symbol = THEMATIC_BREAK;
+        mark_end(s, lexer);
+        s->indentation = 0;
+        return true;
+    }
+    return false;
+}
+
+static bool parse_block_quote(Scanner *s, TSLexer *lexer,
+                              const bool *valid_symbols) {
+    if (valid_symbols[BLOCK_QUOTE_START]) {
+        advance(s, lexer);
+        s->indentation = 0;
+        if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            s->indentation += advance(s, lexer) - 1;
+        }
+        lexer->result_symbol = BLOCK_QUOTE_START;
+        if (!s->simulate)
+            push_block(s, BLOCK_QUOTE);
+        return true;
+    }
+    return false;
+}
+
+static bool parse_atx_heading(Scanner *s, TSLexer *lexer,
+                              const bool *valid_symbols) {
+    if (valid_symbols[ATX_H1_MARKER] && s->indentation <= 3) {
+        mark_end(s, lexer);
+        uint16_t level = 0;
+        while (lexer->lookahead == '#' && level <= 6) {
+            advance(s, lexer);
+            level++;
+        }
+        if (level <= 6 &&
+            (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
+             lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+            lexer->result_symbol = ATX_H1_MARKER + (level - 1);
+            s->indentation = 0;
+            mark_end(s, lexer);
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_setext_underline(Scanner *s, TSLexer *lexer,
+                                   const bool *valid_symbols) {
+    if (valid_symbols[SETEXT_H1_UNDERLINE] &&
+        s->matched == s->open_blocks.size) {
+        mark_end(s, lexer);
+        while (lexer->lookahead == '=') {
+            advance(s, lexer);
+        }
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+            lexer->result_symbol = SETEXT_H1_UNDERLINE;
+            mark_end(s, lexer);
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_plus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_PLUS] ||
+         valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT] ||
+         valid_symbols[PLUS_METADATA])) {
+        advance(s, lexer);
+        if (valid_symbols[PLUS_METADATA] && lexer->lookahead == '+') {
+            advance(s, lexer);
+            if (lexer->lookahead != '+') {
+                return false;
+            }
+            advance(s, lexer);
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                advance(s, lexer);
+            }
+            if (lexer->lookahead != '\n' && lexer->lookahead != '\r') {
+                return false;
+            }
+            for (;;) {
+                // advance over newline
+                if (lexer->lookahead == '\r') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == '\n') {
+                        advance(s, lexer);
+                    }
+                } else {
+                    advance(s, lexer);
+                }
+                // check for pluses
+                size_t plus_count = 0;
+                while (lexer->lookahead == '+') {
+                    plus_count++;
+                    advance(s, lexer);
+                }
+                if (plus_count == 3) {
+                    // if exactly 3 check if next symbol (after eventual
+                    // whitespace) is newline
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                        // if so also consume newline
+                        if (lexer->lookahead == '\r') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == '\n') {
+                                advance(s, lexer);
+                            }
+                        } else {
+                            advance(s, lexer);
+                        }
+                        mark_end(s, lexer);
+                        lexer->result_symbol = PLUS_METADATA;
+                        return true;
+                    }
+                }
+                // otherwise consume rest of line
+                while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                       !lexer->eof(lexer)) {
+                    advance(s, lexer);
+                }
+                // if end of file is reached, then this is not metadata
+                if (lexer->eof(lexer)) {
+                    break;
+                }
+            }
+        } else {
+            uint8_t extra_indentation = 0;
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                extra_indentation += advance(s, lexer);
+            }
+            bool dont_interrupt = false;
+            if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                extra_indentation = 1;
+                dont_interrupt = true;
+            }
+            dont_interrupt =
+                dont_interrupt && s->matched == s->open_blocks.size;
+            if (extra_indentation >= 1 &&
+                (dont_interrupt ? valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_PLUS])) {
+                lexer->result_symbol = dont_interrupt
+                                           ? LIST_MARKER_PLUS_DONT_INTERRUPT
+                                           : LIST_MARKER_PLUS;
+                extra_indentation--;
+                if (extra_indentation <= 3) {
+                    extra_indentation += s->indentation;
+                    s->indentation = 0;
+                } else {
+                    uint8_t temp = s->indentation;
+                    s->indentation = extra_indentation;
+                    extra_indentation = temp;
+                }
+                if (!s->simulate)
+                    push_block(s, (Block)(LIST_ITEM + extra_indentation));
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool parse_ordered_list_marker(Scanner *s, TSLexer *lexer,
+                                      const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_PARENTHESIS] ||
+         valid_symbols[LIST_MARKER_DOT] ||
+         valid_symbols[LIST_MARKER_PARENTHESIS_DONT_INTERRUPT] ||
+         valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT])) {
+        size_t digits = 1;
+        bool dont_interrupt = lexer->lookahead != '1';
+        advance(s, lexer);
+        while (isdigit(lexer->lookahead)) {
+            dont_interrupt = true;
+            digits++;
+            advance(s, lexer);
+        }
+        if (digits >= 1 && digits <= 9) {
+            bool dot = false;
+            bool parenthesis = false;
+            if (lexer->lookahead == '.') {
+                advance(s, lexer);
+                dot = true;
+            } else if (lexer->lookahead == ')') {
+                advance(s, lexer);
+                parenthesis = true;
+            }
+            if (dot || parenthesis) {
+                uint8_t extra_indentation = 0;
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    extra_indentation += advance(s, lexer);
+                }
+                bool line_end =
+                    lexer->lookahead == '\n' || lexer->lookahead == '\r';
+                if (line_end) {
+                    extra_indentation = 1;
+                    dont_interrupt = true;
+                }
+                dont_interrupt =
+                    dont_interrupt && s->matched == s->open_blocks.size;
+                if (extra_indentation >= 1 &&
+                    (dot ? (dont_interrupt
+                                ? valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_DOT])
+                         : (dont_interrupt
+                                ? valid_symbols
+                                      [LIST_MARKER_PARENTHESIS_DONT_INTERRUPT]
+                                : valid_symbols[LIST_MARKER_PARENTHESIS]))) {
+                    lexer->result_symbol =
+                        dot ? LIST_MARKER_DOT : LIST_MARKER_PARENTHESIS;
+                    extra_indentation--;
+                    if (extra_indentation <= 3) {
+                        extra_indentation += s->indentation;
+                        s->indentation = 0;
+                    } else {
+                        uint8_t temp = s->indentation;
+                        s->indentation = extra_indentation;
+                        extra_indentation = temp;
+                    }
+                    if (!s->simulate)
+                        push_block(
+                            s, (Block)(LIST_ITEM + extra_indentation + digits));
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+static bool parse_minus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    if (s->indentation <= 3 &&
+        (valid_symbols[LIST_MARKER_MINUS] ||
+         valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT] ||
+         valid_symbols[SETEXT_H2_UNDERLINE] || valid_symbols[THEMATIC_BREAK] ||
+         valid_symbols[MINUS_METADATA])) {
+        mark_end(s, lexer);
+        bool whitespace_after_minus = false;
+        bool minus_after_whitespace = false;
+        size_t minus_count = 0;
+        uint8_t extra_indentation = 0;
+
+        for (;;) {
+            if (lexer->lookahead == '-') {
+                if (minus_count == 1 && extra_indentation >= 1) {
+                    mark_end(s, lexer);
+                }
+                minus_count++;
+                advance(s, lexer);
+                minus_after_whitespace = whitespace_after_minus;
+            } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                if (minus_count == 1) {
+                    extra_indentation += advance(s, lexer);
+                } else {
+                    advance(s, lexer);
+                }
+                whitespace_after_minus = true;
+            } else {
+                break;
+            }
+        }
+        bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
+        bool dont_interrupt = false;
+        if (minus_count == 1 && line_end) {
+            extra_indentation = 1;
+            dont_interrupt = true;
+        }
+        dont_interrupt = dont_interrupt && s->matched == s->open_blocks.size;
+        bool thematic_break = minus_count >= 3 && line_end;
+        bool underline =
+            minus_count >= 1 && !minus_after_whitespace && line_end &&
+            s->matched ==
+                s->open_blocks
+                    .size; // setext heading can not break lazy continuation
+        bool list_marker_minus = minus_count >= 1 && extra_indentation >= 1;
+        bool success = false;
+        if (valid_symbols[SETEXT_H2_UNDERLINE] && underline) {
+            lexer->result_symbol = SETEXT_H2_UNDERLINE;
+            mark_end(s, lexer);
+            s->indentation = 0;
+            success = true;
+        } else if (valid_symbols[THEMATIC_BREAK] &&
+                   thematic_break) { // underline is false if list_marker_minus
+                                     // is true
+            lexer->result_symbol = THEMATIC_BREAK;
+            mark_end(s, lexer);
+            s->indentation = 0;
+            success = true;
+        } else if ((dont_interrupt
+                        ? valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT]
+                        : valid_symbols[LIST_MARKER_MINUS]) &&
+                   list_marker_minus) {
+            if (minus_count == 1) {
+                mark_end(s, lexer);
+            }
+            extra_indentation--;
+            if (extra_indentation <= 3) {
+                extra_indentation += s->indentation;
+                s->indentation = 0;
+            } else {
+                uint8_t temp = s->indentation;
+                s->indentation = extra_indentation;
+                extra_indentation = temp;
+            }
+            if (!s->simulate)
+                push_block(s, (Block)(LIST_ITEM + extra_indentation));
+            lexer->result_symbol = dont_interrupt
+                                       ? LIST_MARKER_MINUS_DONT_INTERRUPT
+                                       : LIST_MARKER_MINUS;
+            return true;
+        }
+        if (minus_count == 3 && (!minus_after_whitespace) && line_end &&
+            valid_symbols[MINUS_METADATA]) {
+            for (;;) {
+                // advance over newline
+                if (lexer->lookahead == '\r') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == '\n') {
+                        advance(s, lexer);
+                    }
+                } else {
+                    advance(s, lexer);
+                }
+                // check for minuses
+                minus_count = 0;
+                while (lexer->lookahead == '-') {
+                    minus_count++;
+                    advance(s, lexer);
+                }
+                if (minus_count == 3) {
+                    // if exactly 3 check if next symbol (after eventual
+                    // whitespace) is newline
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+                        // if so also consume newline
+                        if (lexer->lookahead == '\r') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == '\n') {
+                                advance(s, lexer);
+                            }
+                        } else {
+                            advance(s, lexer);
+                        }
+                        mark_end(s, lexer);
+                        lexer->result_symbol = MINUS_METADATA;
+                        return true;
+                    }
+                }
+                // otherwise consume rest of line
+                while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
+                       !lexer->eof(lexer)) {
+                    advance(s, lexer);
+                }
+                // if end of file is reached, then this is not metadata
+                if (lexer->eof(lexer)) {
+                    break;
+                }
+            }
+        }
+        if (success) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool parse_html_block(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
+    if (!(valid_symbols[HTML_BLOCK_1_START] ||
+          valid_symbols[HTML_BLOCK_1_END] ||
+          valid_symbols[HTML_BLOCK_2_START] ||
+          valid_symbols[HTML_BLOCK_3_START] ||
+          valid_symbols[HTML_BLOCK_4_START] ||
+          valid_symbols[HTML_BLOCK_5_START] ||
+          valid_symbols[HTML_BLOCK_6_START] ||
+          valid_symbols[HTML_BLOCK_7_START])) {
+        return false;
+    }
+    advance(s, lexer);
+    if (lexer->lookahead == '?' && valid_symbols[HTML_BLOCK_3_START]) {
+        advance(s, lexer);
+        lexer->result_symbol = HTML_BLOCK_3_START;
+        if (!s->simulate)
+            push_block(s, ANONYMOUS);
+        return true;
+    }
+    if (lexer->lookahead == '!') {
+        // could be block 2
+        advance(s, lexer);
+        if (lexer->lookahead == '-') {
+            advance(s, lexer);
+            if (lexer->lookahead == '-' && valid_symbols[HTML_BLOCK_2_START]) {
+                advance(s, lexer);
+                lexer->result_symbol = HTML_BLOCK_2_START;
+                if (!s->simulate)
+                    push_block(s, ANONYMOUS);
+                return true;
+            }
+        } else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z' &&
+                   valid_symbols[HTML_BLOCK_4_START]) {
+            advance(s, lexer);
+            lexer->result_symbol = HTML_BLOCK_4_START;
+            if (!s->simulate)
+                push_block(s, ANONYMOUS);
+            return true;
+        } else if (lexer->lookahead == '[') {
+            advance(s, lexer);
+            if (lexer->lookahead == 'C') {
+                advance(s, lexer);
+                if (lexer->lookahead == 'D') {
+                    advance(s, lexer);
+                    if (lexer->lookahead == 'A') {
+                        advance(s, lexer);
+                        if (lexer->lookahead == 'T') {
+                            advance(s, lexer);
+                            if (lexer->lookahead == 'A') {
+                                advance(s, lexer);
+                                if (lexer->lookahead == '[' &&
+                                    valid_symbols[HTML_BLOCK_5_START]) {
+                                    advance(s, lexer);
+                                    lexer->result_symbol = HTML_BLOCK_5_START;
+                                    if (!s->simulate)
+                                        push_block(s, ANONYMOUS);
+                                    return true;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    bool starting_slash = lexer->lookahead == '/';
+    if (starting_slash) {
+        advance(s, lexer);
+    }
+    char name[11];
+    size_t name_length = 0;
+    while (iswalpha((wint_t)lexer->lookahead)) {
+        if (name_length < 10) {
+            name[name_length++] = (char)towlower((wint_t)lexer->lookahead);
+        } else {
+            name_length = 12;
+        }
+        advance(s, lexer);
+    }
+    if (name_length == 0) {
+        return false;
+    }
+    bool tag_closed = false;
+    if (name_length < 11) {
+        name[name_length] = 0;
+        bool next_symbol_valid =
+            lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
+            lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+            lexer->lookahead == '>';
+        if (next_symbol_valid) {
+            // try block 1 names
+            for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_1; i++) {
+                if (strcmp(name, HTML_TAG_NAMES_RULE_1[i]) == 0) {
+                    if (starting_slash) {
+                        if (valid_symbols[HTML_BLOCK_1_END]) {
+                            lexer->result_symbol = HTML_BLOCK_1_END;
+                            return true;
+                        }
+                    } else if (valid_symbols[HTML_BLOCK_1_START]) {
+                        lexer->result_symbol = HTML_BLOCK_1_START;
+                        if (!s->simulate)
+                            push_block(s, ANONYMOUS);
+                        return true;
+                    }
+                }
+            }
+        }
+        if (!next_symbol_valid && lexer->lookahead == '/') {
+            advance(s, lexer);
+            if (lexer->lookahead == '>') {
+                advance(s, lexer);
+                tag_closed = true;
+            }
+        }
+        if (next_symbol_valid || tag_closed) {
+            // try block 2 names
+            for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_7; i++) {
+                if (strcmp(name, HTML_TAG_NAMES_RULE_7[i]) == 0 &&
+                    valid_symbols[HTML_BLOCK_6_START]) {
+                    lexer->result_symbol = HTML_BLOCK_6_START;
+                    if (!s->simulate)
+                        push_block(s, ANONYMOUS);
+                    return true;
+                }
+            }
+        }
+    }
+
+    if (!valid_symbols[HTML_BLOCK_7_START]) {
+        return false;
+    }
+
+    if (!tag_closed) {
+        // tag name (continued)
+        while (iswalnum((wint_t)lexer->lookahead) || lexer->lookahead == '-') {
+            advance(s, lexer);
+        }
+        if (!starting_slash) {
+            // attributes
+            bool had_whitespace = false;
+            for (;;) {
+                // whitespace
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    had_whitespace = true;
+                    advance(s, lexer);
+                }
+                if (lexer->lookahead == '/') {
+                    advance(s, lexer);
+                    break;
+                }
+                if (lexer->lookahead == '>') {
+                    break;
+                }
+                // attribute name
+                if (!had_whitespace) {
+                    return false;
+                }
+                if (!iswalpha((wint_t)lexer->lookahead) &&
+                    lexer->lookahead != '_' && lexer->lookahead != ':') {
+                    return false;
+                }
+                had_whitespace = false;
+                advance(s, lexer);
+                while (iswalnum((wint_t)lexer->lookahead) ||
+                       lexer->lookahead == '_' || lexer->lookahead == '.' ||
+                       lexer->lookahead == ':' || lexer->lookahead == '-') {
+                    advance(s, lexer);
+                }
+                // attribute value specification
+                // optional whitespace
+                while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    had_whitespace = true;
+                    advance(s, lexer);
+                }
+                // =
+                if (lexer->lookahead == '=') {
+                    advance(s, lexer);
+                    had_whitespace = false;
+                    // optional whitespace
+                    while (lexer->lookahead == ' ' ||
+                           lexer->lookahead == '\t') {
+                        advance(s, lexer);
+                    }
+                    // attribute value
+                    if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
+                        char delimiter = (char)lexer->lookahead;
+                        advance(s, lexer);
+                        while (lexer->lookahead != delimiter &&
+                               lexer->lookahead != '\n' &&
+                               lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+                            advance(s, lexer);
+                        }
+                        if (lexer->lookahead != delimiter) {
+                            return false;
+                        }
+                        advance(s, lexer);
+                    } else {
+                        // unquoted attribute value
+                        bool had_one = false;
+                        while (lexer->lookahead != ' ' &&
+                               lexer->lookahead != '\t' &&
+                               lexer->lookahead != '"' &&
+                               lexer->lookahead != '\'' &&
+                               lexer->lookahead != '=' &&
+                               lexer->lookahead != '<' &&
+                               lexer->lookahead != '>' &&
+                               lexer->lookahead != '`' &&
+                               lexer->lookahead != '\n' &&
+                               lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+                            advance(s, lexer);
+                            had_one = true;
+                        }
+                        if (!had_one) {
+                            return false;
+                        }
+                    }
+                }
+            }
+        } else {
+            while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                advance(s, lexer);
+            }
+        }
+        if (lexer->lookahead != '>') {
+            return false;
+        }
+        advance(s, lexer);
+    }
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+        advance(s, lexer);
+    }
+    if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+        lexer->result_symbol = HTML_BLOCK_7_START;
+        if (!s->simulate)
+            push_block(s, ANONYMOUS);
+        return true;
+    }
+    return false;
+}
+
+static bool parse_pipe_table(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
+
+    // unused
+    (void)(valid_symbols);
+
+    // PIPE_TABLE_START is zero width
+    mark_end(s, lexer);
+    // count number of cells
+    size_t cell_count = 0;
+    // also remember if we see starting and ending pipes, as empty headers have
+    // to have both
+    bool starting_pipe = false;
+    bool ending_pipe = false;
+    bool empty = true;
+    if (lexer->lookahead == '|') {
+        starting_pipe = true;
+        advance(s, lexer);
+    }
+    while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
+           !lexer->eof(lexer)) {
+        if (lexer->lookahead == '|') {
+            cell_count++;
+            ending_pipe = true;
+            advance(s, lexer);
+        } else {
+            if (lexer->lookahead != ' ' && lexer->lookahead != '\t') {
+                ending_pipe = false;
+            }
+            if (lexer->lookahead == '\\') {
+                advance(s, lexer);
+                if (is_punctuation((char)lexer->lookahead)) {
+                    advance(s, lexer);
+                }
+            } else {
+                advance(s, lexer);
+            }
+        }
+    }
+    if (empty && cell_count == 0 && !(starting_pipe && ending_pipe)) {
+        return false;
+    }
+    if (!ending_pipe) {
+        cell_count++;
+    }
+
+    // check the following line for a delimiter row
+    // parse a newline
+    if (lexer->lookahead == '\n') {
+        advance(s, lexer);
+    } else if (lexer->lookahead == '\r') {
+        advance(s, lexer);
+        if (lexer->lookahead == '\n') {
+            advance(s, lexer);
+        }
+    } else {
+        return false;
+    }
+    s->indentation = 0;
+    s->column = 0;
+    for (;;) {
+        if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            s->indentation += advance(s, lexer);
+        } else {
+            break;
+        }
+    }
+    s->simulate = true;
+    uint8_t matched_temp = 0;
+    while (matched_temp < (uint8_t)s->open_blocks.size) {
+        if (match(s, lexer, s->open_blocks.items[matched_temp])) {
+            matched_temp++;
+        } else {
+            return false;
+        }
+    }
+
+    // check if delimiter row has the same number of cells and at least one pipe
+    size_t delimiter_cell_count = 0;
+    if (lexer->lookahead == '|') {
+        advance(s, lexer);
+    }
+    for (;;) {
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '|') {
+            delimiter_cell_count++;
+            advance(s, lexer);
+            continue;
+        }
+        if (lexer->lookahead == ':') {
+            advance(s, lexer);
+            if (lexer->lookahead != '-') {
+                return false;
+            }
+        }
+        bool had_one_minus = false;
+        while (lexer->lookahead == '-') {
+            had_one_minus = true;
+            advance(s, lexer);
+        }
+        if (had_one_minus) {
+            delimiter_cell_count++;
+        }
+        if (lexer->lookahead == ':') {
+            if (!had_one_minus) {
+                return false;
+            }
+            advance(s, lexer);
+        }
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+            advance(s, lexer);
+        }
+        if (lexer->lookahead == '|') {
+            if (!had_one_minus) {
+                delimiter_cell_count++;
+            }
+            advance(s, lexer);
+            continue;
+        }
+        if (lexer->lookahead != '\r' && lexer->lookahead != '\n') {
+            return false;
+        } else {
+            break;
+        }
+    }
+    // if the cell counts are not equal then this is not a table
+    if (cell_count != delimiter_cell_count) {
+        return false;
+    }
+
+    lexer->result_symbol = PIPE_TABLE_START;
+    return true;
+}
+
+static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+    // A normal tree-sitter rule decided that the current branch is invalid and
+    // now "requests" an error to stop the branch
+    if (valid_symbols[TRIGGER_ERROR]) {
+        return error(lexer);
+    }
+
+    // Close the inner most block after the next line break as requested. See
+    // `$._close_block` in grammar.js
+    if (valid_symbols[CLOSE_BLOCK]) {
+        s->state |= STATE_CLOSE_BLOCK;
+        lexer->result_symbol = CLOSE_BLOCK;
+        return true;
+    }
+
+    // if we are at the end of the file and there are still open blocks close
+    // them all
+    if (lexer->eof(lexer)) {
+        if (valid_symbols[TOKEN_EOF]) {
+            lexer->result_symbol = TOKEN_EOF;
+            return true;
+        }
+        if (s->open_blocks.size > 0) {
+            lexer->result_symbol = BLOCK_CLOSE;
+            if (!s->simulate)
+                pop_block(s);
+            return true;
+        }
+        return false;
+    }
+
+    if (!(s->state & STATE_MATCHING)) {
+        // Parse any preceeding whitespace and remember its length. This makes a
+        // lot of parsing quite a bit easier.
+        for (;;) {
+            if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                s->indentation += advance(s, lexer);
+            } else {
+                break;
+            }
+        }
+        // We are not matching. This is where the parsing logic for most
+        // "normal" token is. Most importantly parsing logic for the start of
+        // new blocks.
+        if (valid_symbols[INDENTED_CHUNK_START] &&
+            !valid_symbols[NO_INDENTED_CHUNK]) {
+            if (s->indentation >= 4 && lexer->lookahead != '\n' &&
+                lexer->lookahead != '\r') {
+                lexer->result_symbol = INDENTED_CHUNK_START;
+                if (!s->simulate)
+                    push_block(s, INDENTED_CODE_BLOCK);
+                s->indentation -= 4;
+                return true;
+            }
+        }
+        // Decide which tokens to consider based on the first non-whitespace
+        // character
+        switch (lexer->lookahead) {
+            case '\r':
+            case '\n':
+                if (valid_symbols[BLANK_LINE_START]) {
+                    // A blank line token is actually just 0 width, so do not
+                    // consume the characters
+                    lexer->result_symbol = BLANK_LINE_START;
+                    return true;
+                }
+                break;
+            case '`':
+                // A backtick could mark the beginning or ending of a fenced
+                // code block.
+                return parse_fenced_code_block(s, '`', lexer, valid_symbols);
+            case '~':
+                // A tilde could mark the beginning or ending of a fenced code
+                // block.
+                return parse_fenced_code_block(s, '~', lexer, valid_symbols);
+            case '*':
+                // A star could either mark  a list item or a thematic break.
+                // This code is similar to the code for '_' and '+'.
+                return parse_star(s, lexer, valid_symbols);
+            case '_':
+                return parse_thematic_break_underscore(s, lexer, valid_symbols);
+            case '>':
+                // A '>' could mark the beginning of a block quote
+                return parse_block_quote(s, lexer, valid_symbols);
+            case '#':
+                // A '#' could mark a atx heading
+                return parse_atx_heading(s, lexer, valid_symbols);
+            case '=':
+                // A '=' could mark a setext underline
+                return parse_setext_underline(s, lexer, valid_symbols);
+            case '+':
+                // A '+' could be a list marker
+                return parse_plus(s, lexer, valid_symbols);
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+                // A number could be a list marker (if followed by a dot or a
+                // parenthesis)
+                return parse_ordered_list_marker(s, lexer, valid_symbols);
+            case '-':
+                // A minus could mark a list marker, a thematic break or a
+                // setext underline
+                return parse_minus(s, lexer, valid_symbols);
+            case '<':
+                // A < could mark the beginning of a html block
+                return parse_html_block(s, lexer, valid_symbols);
+        }
+        if (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
+            valid_symbols[PIPE_TABLE_START]) {
+            return parse_pipe_table(s, lexer, valid_symbols);
+        }
+    } else { // we are in the state of trying to match all currently open blocks
+        bool partial_success = false;
+        while (s->matched < (uint8_t)s->open_blocks.size) {
+            if (s->matched == (uint8_t)s->open_blocks.size - 1 &&
+                (s->state & STATE_CLOSE_BLOCK)) {
+                if (!partial_success)
+                    s->state &= ~STATE_CLOSE_BLOCK;
+                break;
+            }
+            if (match(s, lexer, s->open_blocks.items[s->matched])) {
+                partial_success = true;
+                s->matched++;
+            } else {
+                if (s->state & STATE_WAS_SOFT_LINE_BREAK) {
+                    s->state &= (~STATE_MATCHING);
+                }
+                break;
+            }
+        }
+        if (partial_success) {
+            if (s->matched == s->open_blocks.size) {
+                s->state &= (~STATE_MATCHING);
+            }
+            lexer->result_symbol = BLOCK_CONTINUATION;
+            return true;
+        }
+
+        if (!(s->state & STATE_WAS_SOFT_LINE_BREAK)) {
+            lexer->result_symbol = BLOCK_CLOSE;
+            pop_block(s);
+            if (s->matched == s->open_blocks.size) {
+                s->state &= (~STATE_MATCHING);
+            }
+            return true;
+        }
+    }
+
+    // The parser just encountered a line break. Setup the state correspondingly
+    if ((valid_symbols[LINE_ENDING] || valid_symbols[SOFT_LINE_ENDING] ||
+         valid_symbols[PIPE_TABLE_LINE_ENDING]) &&
+        (lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
+        if (lexer->lookahead == '\r') {
+            advance(s, lexer);
+            if (lexer->lookahead == '\n') {
+                advance(s, lexer);
+            }
+        } else {
+            advance(s, lexer);
+        }
+        s->indentation = 0;
+        s->column = 0;
+        if (!(s->state & STATE_CLOSE_BLOCK) &&
+            (valid_symbols[SOFT_LINE_ENDING] ||
+             valid_symbols[PIPE_TABLE_LINE_ENDING])) {
+            lexer->mark_end(lexer);
+            for (;;) {
+                if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+                    s->indentation += advance(s, lexer);
+                } else {
+                    break;
+                }
+            }
+            s->simulate = true;
+            uint8_t matched_temp = s->matched;
+            s->matched = 0;
+            bool one_will_be_matched = false;
+            while (s->matched < (uint8_t)s->open_blocks.size) {
+                if (match(s, lexer, s->open_blocks.items[s->matched])) {
+                    s->matched++;
+                    one_will_be_matched = true;
+                } else {
+                    break;
+                }
+            }
+            bool all_will_be_matched = s->matched == s->open_blocks.size;
+            if (!lexer->eof(lexer) &&
+                !scan(s, lexer, paragraph_interrupt_symbols)) {
+                s->matched = matched_temp;
+                // If the last line break ended a paragraph and no new block
+                // opened, the last line break should have been a soft line
+                // break Reset the counter for matched blocks
+                s->matched = 0;
+                s->indentation = 0;
+                s->column = 0;
+                // If there is at least one open block, we should be in the
+                // matching state. Also set the matching flag if a
+                // `$._soft_line_break_marker` can be emitted so it does get
+                // emitted.
+                if (one_will_be_matched) {
+                    s->state |= STATE_MATCHING;
+                } else {
+                    s->state &= (~STATE_MATCHING);
+                }
+                if (valid_symbols[PIPE_TABLE_LINE_ENDING]) {
+                    if (all_will_be_matched) {
+                        lexer->result_symbol = PIPE_TABLE_LINE_ENDING;
+                        return true;
+                    }
+                } else {
+                    lexer->result_symbol = SOFT_LINE_ENDING;
+                    // reset some state variables
+                    s->state |= STATE_WAS_SOFT_LINE_BREAK;
+                    return true;
+                }
+            } else {
+                s->matched = matched_temp;
+            }
+            s->indentation = 0;
+            s->column = 0;
+        }
+        if (valid_symbols[LINE_ENDING]) {
+            // If the last line break ended a paragraph and no new block opened,
+            // the last line break should have been a soft line break Reset the
+            // counter for matched blocks
+            s->matched = 0;
+            // If there is at least one open block, we should be in the matching
+            // state. Also set the matching flag if a
+            // `$._soft_line_break_marker` can be emitted so it does get
+            // emitted.
+            if (s->open_blocks.size > 0) {
+                s->state |= STATE_MATCHING;
+            } else {
+                s->state &= (~STATE_MATCHING);
+            }
+            // reset some state variables
+            s->state &= (~STATE_WAS_SOFT_LINE_BREAK);
+            lexer->result_symbol = LINE_ENDING;
+            return true;
+        }
+    }
+    return false;
+}
+
+void *tree_sitter_markdown_external_scanner_create(void) {
+    Scanner *s = (Scanner *)malloc(sizeof(Scanner));
+    s->open_blocks.items = (Block *)calloc(1, sizeof(Block));
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+    _Static_assert(ATX_H6_MARKER == ATX_H1_MARKER + 5, "");
+#else
+    assert(ATX_H6_MARKER == ATX_H1_MARKER + 5);
+#endif
+    deserialize(s, NULL, 0);
+
+    return s;
+}
+
+bool tree_sitter_markdown_external_scanner_scan(void *payload, TSLexer *lexer,
+                                                const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    scanner->simulate = false;
+    return scan(scanner, lexer, valid_symbols);
+}
+
+unsigned tree_sitter_markdown_external_scanner_serialize(void *payload,
+                                                         char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+
+void tree_sitter_markdown_external_scanner_deserialize(void *payload,
+                                                       char *buffer,
+                                                       unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+
+void tree_sitter_markdown_external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    free(scanner->open_blocks.items);
+    free(scanner);
+}
diff --git a/test-grammars/markdown/src/tree_sitter/alloc.h b/test-grammars/markdown/src/tree_sitter/alloc.h
new file mode 100644
index 0000000..1f4466d
--- /dev/null
+++ b/test-grammars/markdown/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t);
+extern void *(*ts_current_calloc)(size_t, size_t);
+extern void *(*ts_current_realloc)(void *, size_t);
+extern void (*ts_current_free)(void *);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/test-grammars/markdown/src/tree_sitter/array.h b/test-grammars/markdown/src/tree_sitter/array.h
new file mode 100644
index 0000000..186ba67
--- /dev/null
+++ b/test-grammars/markdown/src/tree_sitter/array.h
@@ -0,0 +1,287 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  (_array__grow((Array *)(self), count, array_elem_size(self)), \
+   memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
+   (self)->size += (count))
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/test-grammars/markdown/src/tree_sitter/parser.h b/test-grammars/markdown/src/tree_sitter/parser.h
new file mode 100644
index 0000000..17b4fde
--- /dev/null
+++ b/test-grammars/markdown/src/tree_sitter/parser.h
@@ -0,0 +1,230 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/test-grammars/rust/LICENSE b/test-grammars/rust/LICENSE
new file mode 100644
index 0000000..ceaf3c9
--- /dev/null
+++ b/test-grammars/rust/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Maxim Sokolov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test-grammars/rust/highlights.scm b/test-grammars/rust/highlights.scm
new file mode 100644
index 0000000..52c13dd
--- /dev/null
+++ b/test-grammars/rust/highlights.scm
@@ -0,0 +1,479 @@
+; -------
+; Basic identifiers
+; -------
+
+; We do not style ? as an operator on purpose as it allows styling ? differently, as many highlighters do. @operator.special might have been a better scope, but @special is already documented so the change would break themes (including the intent of the default theme)
+"?" @special
+
+(type_identifier) @type
+(identifier) @variable
+(field_identifier) @variable.other.member
+
+; -------
+; Operators
+; -------
+
+[
+  "*"
+  "'"
+  "->"
+  "=>"
+  "<="
+  "="
+  "=="
+  "!"
+  "!="
+  "%"
+  "%="
+  "&"
+  "&="
+  "&&"
+  "|"
+  "|="
+  "||"
+  "^"
+  "^="
+  "*"
+  "*="
+  "-"
+  "-="
+  "+"
+  "+="
+  "/"
+  "/="
+  ">"
+  "<"
+  ">="
+  ">>"
+  "<<"
+  ">>="
+  "<<="
+  "@"
+  ".."
+  "..="
+  "'"
+] @operator
+
+; -------
+; Paths
+; -------
+
+(use_declaration
+  argument: (identifier) @namespace)
+(use_wildcard
+  (identifier) @namespace)
+(extern_crate_declaration
+  name: (identifier) @namespace
+  alias: (identifier)? @namespace)
+(mod_item
+  name: (identifier) @namespace)
+(scoped_use_list
+  path: (identifier)? @namespace)
+(use_list
+  (identifier) @namespace)
+(use_as_clause
+  path: (identifier)? @namespace
+  alias: (identifier) @namespace)
+
+; -------
+; Types
+; -------
+
+(type_parameters
+  (type_identifier) @type.parameter)
+(constrained_type_parameter
+  left: (type_identifier) @type.parameter)
+(optional_type_parameter
+  name: (type_identifier) @type.parameter)
+((type_arguments (type_identifier) @constant)
+ (#match? @constant "^[A-Z_]+$"))
+(type_arguments (type_identifier) @type)
+(tuple_struct_pattern "_" @comment.unused)
+((type_arguments (type_identifier) @comment.unused)
+ (#eq? @comment.unused "_"))
+
+; ---
+; Primitives
+; ---
+
+(escape_sequence) @constant.character.escape
+(primitive_type) @type.builtin
+(boolean_literal) @constant.builtin.boolean
+(integer_literal) @constant.numeric.integer
+(float_literal) @constant.numeric.float
+(char_literal) @constant.character
+[
+  (string_literal)
+  (raw_string_literal)
+] @string
+(outer_doc_comment_marker "/" @comment)
+(inner_doc_comment_marker "!" @comment)
+[
+  (line_comment)
+  (block_comment)
+] @comment
+
+; ---
+; Extraneous
+; ---
+
+(self) @variable.builtin
+
+(field_initializer
+  (field_identifier) @variable.other.member)
+(shorthand_field_initializer
+  (identifier) @variable.other.member)
+(shorthand_field_identifier) @variable.other.member
+
+(lifetime
+  "'" @label
+  (identifier) @label)
+(label
+  "'" @label
+  (identifier) @label)
+
+; ---
+; Punctuation
+; ---
+
+[
+  "::"
+  "."
+  ";"
+  ","
+  ":"
+] @punctuation.delimiter
+
+[
+  "("
+  ")"
+  "["
+  "]"
+  "{"
+  "}"
+  "#"
+] @punctuation.bracket
+(type_arguments
+  [
+    "<"
+    ">"
+  ] @punctuation.bracket)
+(type_parameters
+  [
+    "<"
+    ">"
+  ] @punctuation.bracket)
+(for_lifetimes ["<" ">"] @punctuation.bracket)
+(closure_parameters
+  "|" @punctuation.bracket)
+(bracketed_type ["<" ">"] @punctuation.bracket)
+
+; ---
+; Variables
+; ---
+
+(let_declaration
+  pattern: [
+    ((identifier) @variable)
+    ((tuple_pattern
+      (identifier) @variable))
+  ])
+  
+; It needs to be anonymous to not conflict with `call_expression` further below. 
+(_
+ value: (field_expression
+  value: (identifier)? @variable
+  field: (field_identifier) @variable.other.member))
+
+(parameter
+	pattern: (identifier) @variable.parameter)
+(closure_parameters
+	(identifier) @variable.parameter)
+
+; -------
+; Keywords
+; -------
+
+(for_expression
+  "for" @keyword.control.repeat)
+(gen_block "gen" @keyword.control)
+
+"in" @keyword.control
+
+[
+  "match"
+  "if"
+  "else"
+  "try"
+] @keyword.control.conditional
+
+[
+  "while"
+  "loop"
+] @keyword.control.repeat
+
+[
+  "break"
+  "continue"
+  "return"
+  "await"
+  "yield"
+] @keyword.control.return
+
+"use" @keyword.control.import
+(mod_item "mod" @keyword.control.import !body)
+(use_as_clause "as" @keyword.control.import)
+
+(type_cast_expression "as" @keyword.operator)
+
+((generic_type
+    type: (type_identifier) @keyword)
+ (#eq? @keyword "use"))
+
+[
+  (crate)
+  (super)
+  "as"
+  "pub"
+  "mod"
+  "extern"
+
+  "impl"
+  "where"
+  "trait"
+  "for"
+
+  "default"
+  "async"
+] @keyword
+
+[
+  "struct"
+  "enum"
+  "union"
+  "type"
+] @keyword.storage.type
+
+"let" @keyword.storage
+"fn" @keyword.function
+"unsafe" @keyword.special
+"macro_rules!" @function.macro
+
+(mutable_specifier) @keyword.storage.modifier.mut
+
+(reference_type "&" @keyword.storage.modifier.ref)
+(self_parameter "&" @keyword.storage.modifier.ref)
+
+[
+  "static"
+  "const"
+  "raw"
+  "ref"
+  "move"
+  "dyn"
+] @keyword.storage.modifier
+
+; TODO: variable.mut to highlight mutable identifiers via locals.scm
+
+; ---
+; Remaining Paths
+; ---
+
+(scoped_identifier
+  path: (identifier)? @namespace
+  name: (identifier) @namespace)
+(scoped_type_identifier
+  path: (identifier) @namespace)
+
+; -------
+; Functions
+; -------
+
+(call_expression
+  function: [
+    ((identifier) @function)
+    (scoped_identifier
+      name: (identifier) @function)
+    (field_expression
+      field: (field_identifier) @function)
+  ])
+(generic_function
+  function: [
+    ((identifier) @function)
+    (scoped_identifier
+      name: (identifier) @function)
+    (field_expression
+      field: (field_identifier) @function.method)
+  ])
+
+(function_item
+  name: (identifier) @function)
+
+(function_signature_item
+  name: (identifier) @function)
+
+; -------
+; Guess Other Types
+; -------
+; Other PascalCase identifiers are assumed to be structs.
+
+((identifier) @type
+  (#match? @type "^[A-Z]"))
+
+(never_type "!" @type)
+
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]*$"))
+
+; ---
+; PascalCase identifiers in call_expressions (e.g. `Ok()`)
+; are assumed to be enum constructors.
+; ---
+
+(call_expression
+  function: [
+    ((identifier) @constructor
+      (#match? @constructor "^[A-Z]"))
+    (scoped_identifier
+      name: ((identifier) @constructor
+        (#match? @constructor "^[A-Z]")))
+  ])
+
+; ---
+; PascalCase identifiers under a path which is also PascalCase
+; are assumed to be constructors if they have methods or fields.
+; ---
+
+(field_expression
+  value: (scoped_identifier
+    path: [
+      (identifier) @type
+      (scoped_identifier
+        name: (identifier) @type)
+    ]
+    name: (identifier) @constructor
+      (#match? @type "^[A-Z]")
+      (#match? @constructor "^[A-Z]")))
+
+(enum_variant (identifier) @type.enum.variant)
+
+
+; -------
+; Constructors
+; -------
+; TODO: this is largely guesswork, remove it once we get actual info from locals.scm or r-a
+
+(struct_expression
+  name: (type_identifier) @constructor)
+
+(tuple_struct_pattern
+  type: [
+    (identifier) @constructor
+    (scoped_identifier
+      name: (identifier) @constructor)
+  ])
+(struct_pattern
+  type: [
+    ((type_identifier) @constructor)
+    (scoped_type_identifier
+      name: (type_identifier) @constructor)
+  ])
+(match_pattern
+  ((identifier) @constructor) (#match? @constructor "^[A-Z]"))
+(or_pattern
+  ((identifier) @constructor)
+  ((identifier) @constructor)
+  (#match? @constructor "^[A-Z]"))
+
+; ---
+; Macros
+; ---
+
+(attribute
+  (identifier) @function.macro)
+(inner_attribute_item "!" @punctuation)
+(attribute
+  [
+    (identifier) @function.macro
+    (scoped_identifier
+      name: (identifier) @function.macro)
+  ]
+  (token_tree (identifier) @function.macro)?)
+
+(inner_attribute_item) @attribute
+
+(macro_definition
+  name: (identifier) @function.macro)
+(macro_invocation
+  macro: [
+    ((identifier) @function.macro)
+    (scoped_identifier
+      name: (identifier) @function.macro)
+  ]
+  "!" @function.macro)
+
+(metavariable) @variable.parameter
+(fragment_specifier) @type
+
+(attribute
+  (identifier) @special
+  arguments: (token_tree (identifier) @type)
+  (#eq? @special "derive")
+)
+
+; ---
+; Prelude
+; ---
+
+((identifier) @type.enum.variant.builtin
+ (#any-of? @type.enum.variant.builtin "Some" "None" "Ok" "Err"))
+
+
+(call_expression
+  (identifier) @function.builtin
+  (#any-of? @function.builtin
+    "drop"
+    "size_of"
+    "size_of_val"
+    "align_of"
+    "align_of_val"))
+
+((type_identifier) @type.builtin
+ (#any-of?
+    @type.builtin
+    "Send"
+    "Sized"
+    "Sync"
+    "Unpin"
+    "Drop"
+    "Fn"
+    "FnMut"
+    "FnOnce"
+    "AsMut"
+    "AsRef"
+    "From"
+    "Into"
+    "DoubleEndedIterator"
+    "ExactSizeIterator"
+    "Extend"
+    "IntoIterator"
+    "Iterator"
+    "Option"
+    "Result"
+    "Clone"
+    "Copy"
+    "Debug"
+    "Default"
+    "Eq"
+    "Hash"
+    "Ord"
+    "PartialEq"
+    "PartialOrd"
+    "ToOwned"
+    "Box"
+    "String"
+    "ToString"
+    "Vec"
+    "FromIterator"
+    "TryFrom"
+    "TryInto"))
diff --git a/test-grammars/rust/injections.scm b/test-grammars/rust/injections.scm
new file mode 100644
index 0000000..06c4d13
--- /dev/null
+++ b/test-grammars/rust/injections.scm
@@ -0,0 +1,81 @@
+([(line_comment !doc) (block_comment !doc)] @injection.content
+ (#set! injection.language "comment"))
+
+((doc_comment) @injection.content
+ (#set! injection.language "markdown")
+ (#set! injection.combined))
+
+((macro_invocation
+   macro:
+     [
+       (scoped_identifier
+         name: (_) @_macro_name)
+       (identifier) @_macro_name
+     ]
+   (token_tree) @injection.content)
+ (#eq? @_macro_name "html")
+ (#set! injection.language "html")
+ (#set! injection.include-children))
+
+((macro_invocation
+   macro:
+     [
+       (scoped_identifier
+         name: (_) @_macro_name)
+       (identifier) @_macro_name
+     ]
+   (token_tree) @injection.content)
+ (#eq? @_macro_name "slint")
+ (#set! injection.language "slint")
+ (#set! injection.include-children))
+
+((macro_invocation
+  (token_tree) @injection.content)
+ (#set! injection.language "rust")
+ (#set! injection.include-children))
+
+((macro_rule
+  (token_tree) @injection.content)
+ (#set! injection.language "rust")
+ (#set! injection.include-children))
+
+(call_expression
+  function: (scoped_identifier
+    path: (identifier) @_regex (#eq? @_regex "Regex")
+    name: (identifier) @_new (#eq? @_new "new"))
+  arguments: (arguments (raw_string_literal) @injection.content)
+  (#set! injection.language "regex"))
+
+(call_expression
+  function: (scoped_identifier
+    path: (scoped_identifier (identifier) @_regex (#eq? @_regex "Regex") .)
+    name: (identifier) @_new (#eq? @_new "new"))
+  arguments: (arguments (raw_string_literal) @injection.content)
+  (#set! injection.language "regex"))
+
+; Highlight SQL in `sqlx::query!()`, `sqlx::query_scalar!()`, and `sqlx::query_scalar_unchecked!()`
+(macro_invocation
+  macro: (scoped_identifier
+    path: (identifier) @_sqlx (#eq? @_sqlx "sqlx")
+    name: (identifier) @_query (#match? @_query "^query(_scalar|_scalar_unchecked)?$"))
+  (token_tree
+    ; Only the first argument is SQL
+    .
+    [(string_literal) (raw_string_literal)] @injection.content
+  )
+  (#set! injection.language "sql"))
+
+; Highlight SQL in `sqlx::query_as!()` and `sqlx::query_as_unchecked!()`
+(macro_invocation
+  macro: (scoped_identifier
+    path: (identifier) @_sqlx (#eq? @_sqlx "sqlx")
+    name: (identifier) @_query_as (#match? @_query_as "^query_as(_unchecked)?$"))
+  (token_tree
+    ; Only the second argument is SQL
+    .
+    ; Allow anything as the first argument in case the user has lower case type
+    ; names for some reason
+    (_)
+    [(string_literal) (raw_string_literal)] @injection.content
+  )
+  (#set! injection.language "sql"))
diff --git a/test-grammars/rust/metadata.json b/test-grammars/rust/metadata.json
new file mode 100644
index 0000000..b4b896b
--- /dev/null
+++ b/test-grammars/rust/metadata.json
@@ -0,0 +1,6 @@
+{
+  "repo": "https://github.com/tree-sitter/tree-sitter-rust",
+  "rev": "1f63b33efee17e833e0ea29266dd3d713e27e321",
+  "license": "MIT",
+  "compressed": true
+}
\ No newline at end of file
diff --git a/test-grammars/rust/src/grammar.json b/test-grammars/rust/src/grammar.json
new file mode 100644
index 0000000..42726eb
Binary files /dev/null and b/test-grammars/rust/src/grammar.json differ
diff --git a/test-grammars/rust/src/parser.c b/test-grammars/rust/src/parser.c
new file mode 100644
index 0000000..4acbf46
Binary files /dev/null and b/test-grammars/rust/src/parser.c differ
diff --git a/test-grammars/rust/src/scanner.c b/test-grammars/rust/src/scanner.c
new file mode 100644
index 0000000..269f6b2
--- /dev/null
+++ b/test-grammars/rust/src/scanner.c
@@ -0,0 +1,393 @@
+#include "tree_sitter/alloc.h"
+#include "tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+    STRING_CONTENT,
+    RAW_STRING_LITERAL_START,
+    RAW_STRING_LITERAL_CONTENT,
+    RAW_STRING_LITERAL_END,
+    FLOAT_LITERAL,
+    BLOCK_OUTER_DOC_MARKER,
+    BLOCK_INNER_DOC_MARKER,
+    BLOCK_COMMENT_CONTENT,
+    LINE_DOC_CONTENT,
+    ERROR_SENTINEL
+};
+
+typedef struct {
+    uint8_t opening_hash_count;
+} Scanner;
+
+void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
+
+void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
+
+unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    buffer[0] = (char)scanner->opening_hash_count;
+    return 1;
+}
+
+void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    scanner->opening_hash_count = 0;
+    if (length == 1) {
+        Scanner *scanner = (Scanner *)payload;
+        scanner->opening_hash_count = buffer[0];
+    }
+}
+
+static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static inline bool process_string(TSLexer *lexer) {
+    bool has_content = false;
+    for (;;) {
+        if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
+            break;
+        }
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+        has_content = true;
+        advance(lexer);
+    }
+    lexer->result_symbol = STRING_CONTENT;
+    lexer->mark_end(lexer);
+    return has_content;
+}
+
+static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
+    if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
+        advance(lexer);
+    }
+    if (lexer->lookahead != 'r') {
+        return false;
+    }
+    advance(lexer);
+
+    uint8_t opening_hash_count = 0;
+    while (lexer->lookahead == '#') {
+        advance(lexer);
+        opening_hash_count++;
+    }
+
+    if (lexer->lookahead != '"') {
+        return false;
+    }
+    advance(lexer);
+    scanner->opening_hash_count = opening_hash_count;
+
+    lexer->result_symbol = RAW_STRING_LITERAL_START;
+    return true;
+}
+
+static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
+    for (;;) {
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+        if (lexer->lookahead == '"') {
+            lexer->mark_end(lexer);
+            advance(lexer);
+            unsigned hash_count = 0;
+            while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
+                advance(lexer);
+                hash_count++;
+            }
+            if (hash_count == scanner->opening_hash_count) {
+                lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
+                return true;
+            }
+        } else {
+            advance(lexer);
+        }
+    }
+}
+
+static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
+    advance(lexer);
+    for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
+        advance(lexer);
+    }
+    lexer->result_symbol = RAW_STRING_LITERAL_END;
+    return true;
+}
+
+static inline bool process_float_literal(TSLexer *lexer) {
+    lexer->result_symbol = FLOAT_LITERAL;
+
+    advance(lexer);
+    while (is_num_char(lexer->lookahead)) {
+        advance(lexer);
+    }
+
+    bool has_fraction = false, has_exponent = false;
+
+    if (lexer->lookahead == '.') {
+        has_fraction = true;
+        advance(lexer);
+        if (iswalpha(lexer->lookahead)) {
+            // The dot is followed by a letter: 1.max(2) => not a float but an integer
+            return false;
+        }
+
+        if (lexer->lookahead == '.') {
+            return false;
+        }
+        while (is_num_char(lexer->lookahead)) {
+            advance(lexer);
+        }
+    }
+
+    lexer->mark_end(lexer);
+
+    if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
+        has_exponent = true;
+        advance(lexer);
+        if (lexer->lookahead == '+' || lexer->lookahead == '-') {
+            advance(lexer);
+        }
+        if (!is_num_char(lexer->lookahead)) {
+            return true;
+        }
+        advance(lexer);
+        while (is_num_char(lexer->lookahead)) {
+            advance(lexer);
+        }
+
+        lexer->mark_end(lexer);
+    }
+
+    if (!has_exponent && !has_fraction) {
+        return false;
+    }
+
+    if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
+        return true;
+    }
+    advance(lexer);
+    if (!iswdigit(lexer->lookahead)) {
+        return true;
+    }
+
+    while (iswdigit(lexer->lookahead)) {
+        advance(lexer);
+    }
+
+    lexer->mark_end(lexer);
+    return true;
+}
+
+static inline bool process_line_doc_content(TSLexer *lexer) {
+    lexer->result_symbol = LINE_DOC_CONTENT;
+    for (;;) {
+        if (lexer->eof(lexer)) {
+            return true;
+        }
+        if (lexer->lookahead == '\n') {
+            // Include the newline in the doc content node.
+            // Line endings are useful for markdown injection.
+            advance(lexer);
+            return true;
+        }
+        advance(lexer);
+    }
+}
+
+typedef enum {
+    LeftForwardSlash,
+    LeftAsterisk,
+    Continuing,
+} BlockCommentState;
+
+typedef struct {
+    BlockCommentState state;
+    unsigned nestingDepth;
+} BlockCommentProcessing;
+
+static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
+    if (current == '*') {
+        processing->nestingDepth += 1;
+    }
+    processing->state = Continuing;
+};
+
+static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
+    if (current == '*') {
+        lexer->mark_end(lexer);
+        processing->state = LeftAsterisk;
+        return;
+    }
+
+    if (current == '/') {
+        processing->nestingDepth -= 1;
+    }
+
+    processing->state = Continuing;
+}
+
+static inline void process_continuing(BlockCommentProcessing *processing, char current) {
+    switch (current) {
+        case '/':
+            processing->state = LeftForwardSlash;
+            break;
+        case '*':
+            processing->state = LeftAsterisk;
+            break;
+    }
+}
+
+static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
+    char first = (char)lexer->lookahead;
+    // The first character is stored so we can safely advance inside
+    // these if blocks. However, because we only store one, we can only
+    // safely advance 1 time. Since there's a chance that an advance could
+    // happen in one state, we must advance in all states to ensure that
+    // the program ends up in a sane state prior to processing the block
+    // comment if need be.
+    if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
+        lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
+        advance(lexer);
+        return true;
+    }
+    if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
+        advance(lexer);
+        lexer->mark_end(lexer);
+        // If the next token is a / that means that it's an empty block comment.
+        if (lexer->lookahead == '/') {
+            return false;
+        }
+        // If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
+        // as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
+        if (lexer->lookahead != '*') {
+            lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
+            return true;
+        }
+    } else {
+        advance(lexer);
+    }
+
+    if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
+        BlockCommentProcessing processing = {Continuing, 1};
+        // Manually set the current state based on the first character
+        switch (first) {
+            case '*':
+                processing.state = LeftAsterisk;
+                if (lexer->lookahead == '/') {
+                    // This case can happen in an empty doc block comment
+                    // like /*!*/. The comment has no contents, so bail.
+                    return false;
+                }
+                break;
+            case '/':
+                processing.state = LeftForwardSlash;
+                break;
+            default:
+                processing.state = Continuing;
+                break;
+        }
+
+        // For the purposes of actually parsing rust code, this
+        // is incorrect as it considers an unterminated block comment
+        // to be an error. However, for the purposes of syntax highlighting
+        // this should be considered successful as otherwise you are not able
+        // to syntax highlight a block of code prior to closing the
+        // block comment
+        while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
+            // Set first to the current lookahead as that is the second character
+            // as we force an advance in the above code when we are checking if we
+            // need to handle a block comment inner or outer doc comment signifier
+            // node
+            first = (char)lexer->lookahead;
+            switch (processing.state) {
+                case LeftForwardSlash:
+                    process_left_forward_slash(&processing, first);
+                    break;
+                case LeftAsterisk:
+                    process_left_asterisk(&processing, first, lexer);
+                    break;
+                case Continuing:
+                    lexer->mark_end(lexer);
+                    process_continuing(&processing, first);
+                    break;
+                default:
+                    break;
+            }
+            advance(lexer);
+            if (first == '/' && processing.nestingDepth != 0) {
+                lexer->mark_end(lexer);
+            }
+        }
+        lexer->result_symbol = BLOCK_COMMENT_CONTENT;
+        return true;
+    }
+
+    return false;
+}
+
+bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    // The documentation states that if the lexical analysis fails for some reason
+    // they will mark every state as valid and pass it to the external scanner
+    // However, we can't do anything to help them recover in that case so we
+    // should just fail.
+    /*
+      link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+      If a syntax error is encountered during regular parsing, Tree-sitter’s
+      first action during error recovery will be to call the external scanner’s
+      scan function with all tokens marked valid. The scanner should detect this
+      case and handle it appropriately. One simple method of detection is to add
+      an unused token to the end of the externals array, for example
+
+      externals: $ => [$.token1, $.token2, $.error_sentinel],
+
+      then check whether that token is marked valid to determine whether
+      Tree-sitter is in error correction mode.
+    */
+    if (valid_symbols[ERROR_SENTINEL]) {
+        return false;
+    }
+
+    Scanner *scanner = (Scanner *)payload;
+
+    if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
+        valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
+        return process_block_comment(lexer, valid_symbols);
+    }
+
+    if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
+        return process_string(lexer);
+    }
+
+    if (valid_symbols[LINE_DOC_CONTENT]) {
+        return process_line_doc_content(lexer);
+    }
+
+    while (iswspace(lexer->lookahead)) {
+        skip(lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_START] &&
+        (lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
+        return scan_raw_string_start(scanner, lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
+        return scan_raw_string_content(scanner, lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
+        return scan_raw_string_end(scanner, lexer);
+    }
+
+    if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
+        return process_float_literal(lexer);
+    }
+
+    return false;
+}
diff --git a/test-grammars/rust/src/tree_sitter/alloc.h b/test-grammars/rust/src/tree_sitter/alloc.h
new file mode 100644
index 0000000..1abdd12
--- /dev/null
+++ b/test-grammars/rust/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t size);
+extern void *(*ts_current_calloc)(size_t count, size_t size);
+extern void *(*ts_current_realloc)(void *ptr, size_t size);
+extern void (*ts_current_free)(void *ptr);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/test-grammars/rust/src/tree_sitter/array.h b/test-grammars/rust/src/tree_sitter/array.h
new file mode 100644
index 0000000..15a3b23
--- /dev/null
+++ b/test-grammars/rust/src/tree_sitter/array.h
@@ -0,0 +1,290 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  do { \
+    if ((count) == 0) break; \
+    _array__grow((Array *)(self), count, array_elem_size(self)); \
+    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+    (self)->size += (count); \
+  } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/test-grammars/rust/src/tree_sitter/parser.h b/test-grammars/rust/src/tree_sitter/parser.h
new file mode 100644
index 0000000..799f599
--- /dev/null
+++ b/test-grammars/rust/src/tree_sitter/parser.h
@@ -0,0 +1,266 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+  void (*log)(const TSLexer *, const char *, ...);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_