Support heredocs (#45)

This adds support for heredocs in Dockerfiles. This required adding an external scanner to store the required state.

Co-authored-by: Camden Cheek <camden@ccheek.com>
This commit is contained in:
tvrinssen 2024-04-19 22:08:28 +02:00 committed by GitHub
parent 33e22c33bc
commit 1bf9daef46
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 8686 additions and 5898 deletions

1
.clang-format Normal file
View file

@ -0,0 +1 @@
IndentWidth: 4

38
.editorconfig Normal file
View file

@ -0,0 +1,38 @@
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2
[*.js]
indent_style = space
indent_size = 2
[*.rs]
indent_style = space
indent_size = 4
[*.{c,cc,h}]
indent_style = space
indent_size = 4
[*.{py,pyi}]
indent_style = space
indent_size = 4
[*.swift]
indent_style = space
indent_size = 4
[*.go]
indent_style = tab
indent_size = 8
[Makefile]
indent_style = tab
indent_size = 8

11
.gitattributes vendored Normal file
View file

@ -0,0 +1,11 @@
* text eol=lf
src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated
bindings/** linguist-generated
binding.gyp linguist-generated
setup.py linguist-generated
Makefile linguist-generated
Package.swift linguist-generated

1
Package.swift generated
View file

@ -28,6 +28,7 @@ let package = Package(
], ],
sources: [ sources: [
"src/parser.c", "src/parser.c",
"src/scanner.c",
], ],
resources: [ resources: [
.copy("queries") .copy("queries")

21
binding.gyp generated
View file

@ -2,18 +2,29 @@
"targets": [ "targets": [
{ {
"target_name": "tree_sitter_dockerfile_binding", "target_name": "tree_sitter_dockerfile_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [ "include_dirs": [
"<!(node -e \"require('nan')\")", "src",
"src"
], ],
"sources": [ "sources": [
"bindings/node/binding.cc", "bindings/node/binding.cc",
"src/parser.c", "src/parser.c",
# If your language uses an external scanner, add it here. "src/scanner.c",
], ],
"conditions": [
["OS!='win'", {
"cflags_c": [ "cflags_c": [
"-std=c99", "-std=c11",
] ],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
} }
] ]
} }

11
bindings/c/tree-sitter-dockerfile.pc.in generated Normal file
View file

@ -0,0 +1,11 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
Name: tree-sitter-dockerfile
Description: Dockerfile grammar for tree-sitter
URL: @URL@
Version: @VERSION@
Requires: @REQUIRES@
Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-dockerfile
Cflags: -I${includedir}

13
bindings/go/binding.go generated Normal file
View file

@ -0,0 +1,13 @@
package tree_sitter_dockerfile
// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// #include "../../src/scanner.c"
import "C"
import "unsafe"
// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
return unsafe.Pointer(C.tree_sitter_dockerfile())
}

15
bindings/go/binding_test.go generated Normal file
View file

@ -0,0 +1,15 @@
package tree_sitter_dockerfile_test
import (
"testing"
tree_sitter_dockerfile "github.com/camdencheek/tree-sitter-dockerfile"
tree_sitter "github.com/smacker/go-tree-sitter"
)
func TestCanLoadGrammar(t *testing.T) {
language := tree_sitter.NewLanguage(tree_sitter_dockerfile.Language())
if language == nil {
t.Errorf("Error loading Dockerfile grammar")
}
}

5
bindings/go/go.mod generated Normal file
View file

@ -0,0 +1,5 @@
module github.com/camdencheek/tree-sitter-dockerfile
go 1.22
require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8

View file

@ -1,28 +1,20 @@
#include "tree_sitter/parser.h" #include <napi.h>
#include <node.h>
#include "nan.h"
using namespace v8; typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage *tree_sitter_dockerfile(); extern "C" TSLanguage *tree_sitter_dockerfile();
namespace { // "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
NAN_METHOD(New) {} Napi::Object Init(Napi::Env env, Napi::Object exports) {
exports["name"] = Napi::String::New(env, "dockerfile");
void Init(Local<Object> exports, Local<Object> module) { auto language = Napi::External<TSLanguage>::New(env, tree_sitter_dockerfile());
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New); language.TypeTag(&LANGUAGE_TYPE_TAG);
tpl->SetClassName(Nan::New("Language").ToLocalChecked()); exports["language"] = language;
tpl->InstanceTemplate()->SetInternalFieldCount(1); return exports;
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_dockerfile());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("dockerfile").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
} }
NODE_MODULE(tree_sitter_dockerfile_binding, Init) NODE_API_MODULE(tree_sitter_dockerfile_binding, Init)
} // namespace

28
bindings/node/index.d.ts generated vendored Normal file
View file

@ -0,0 +1,28 @@
type BaseNode = {
type: string;
named: boolean;
};
type ChildNode = {
multiple: boolean;
required: boolean;
types: BaseNode[];
};
type NodeInfo =
| (BaseNode & {
subtypes: BaseNode[];
})
| (BaseNode & {
fields: { [name: string]: ChildNode };
children: ChildNode[];
});
type Language = {
name: string;
language: unknown;
nodeTypeInfo: NodeInfo[];
};
declare const language: Language;
export = language;

18
bindings/node/index.js generated
View file

@ -1,18 +1,6 @@
try { const root = require("path").join(__dirname, "..", "..");
module.exports = require("../../build/Release/tree_sitter_dockerfile_binding");
} catch (error1) { module.exports = require("node-gyp-build")(root);
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_dockerfile_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try { try {
module.exports.nodeTypeInfo = require("../../src/node-types.json"); module.exports.nodeTypeInfo = require("../../src/node-types.json");

View file

@ -0,0 +1,5 @@
"Dockerfile grammar for tree-sitter"
from ._binding import language
__all__ = ["language"]

View file

@ -0,0 +1 @@
def language() -> int: ...

View file

@ -0,0 +1,27 @@
#include <Python.h>
typedef struct TSLanguage TSLanguage;
TSLanguage *tree_sitter_dockerfile(void);
static PyObject* _binding_language(PyObject *self, PyObject *args) {
return PyLong_FromVoidPtr(tree_sitter_dockerfile());
}
static PyMethodDef methods[] = {
{"language", _binding_language, METH_NOARGS,
"Get the tree-sitter language for this grammar."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_binding",
.m_doc = NULL,
.m_size = -1,
.m_methods = methods
};
PyMODINIT_FUNC PyInit__binding(void) {
return PyModule_Create(&module);
}

View file

View file

@ -7,17 +7,18 @@ fn main() {
.flag_if_supported("-Wno-unused-parameter") .flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable") .flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs"); .flag_if_supported("-Wno-trigraphs");
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c"); let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path); c_config.file(&parser_path);
// If your language uses an external scanner written in C, // If your language uses an external scanner written in C,
// then include this block of code: // then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c"); let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path); c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
c_config.compile("parser"); c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

View file

@ -2,6 +2,13 @@ module.exports = grammar({
name: "dockerfile", name: "dockerfile",
extras: ($) => [/\s+/, $.line_continuation], extras: ($) => [/\s+/, $.line_continuation],
externals: ($) => [
$.heredoc_marker,
$.heredoc_line,
$.heredoc_end,
$.heredoc_nl,
$.error_sentinel,
],
rules: { rules: {
source_file: ($) => repeat(seq(choice($._instruction, $.comment), "\n")), source_file: ($) => repeat(seq(choice($._instruction, $.comment), "\n")),
@ -26,7 +33,7 @@ module.exports = grammar({
$.healthcheck_instruction, $.healthcheck_instruction,
$.shell_instruction, $.shell_instruction,
$.maintainer_instruction, $.maintainer_instruction,
$.cross_build_instruction $.cross_build_instruction,
), ),
from_instruction: ($) => from_instruction: ($) =>
@ -46,7 +53,8 @@ module.exports = grammar({
$.mount_param $.mount_param
) )
), ),
choice($.json_string_array, $.shell_command) choice($.json_string_array, $.shell_command),
repeat($.heredoc_block)
), ),
cmd_instruction: ($) => cmd_instruction: ($) =>
@ -75,9 +83,10 @@ module.exports = grammar({
alias(/[aA][dD][dD]/, "ADD"), alias(/[aA][dD][dD]/, "ADD"),
repeat($.param), repeat($.param),
repeat1( repeat1(
seq($.path, $._non_newline_whitespace) seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
), ),
$.path alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
), ),
copy_instruction: ($) => copy_instruction: ($) =>
@ -85,9 +94,10 @@ module.exports = grammar({
alias(/[cC][oO][pP][yY]/, "COPY"), alias(/[cC][oO][pP][yY]/, "COPY"),
repeat($.param), repeat($.param),
repeat1( repeat1(
seq($.path, $._non_newline_whitespace) seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
), ),
$.path alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
), ),
entrypoint_instruction: ($) => entrypoint_instruction: ($) =>
@ -193,15 +203,41 @@ module.exports = grammar({
/.*/ /.*/
), ),
heredoc_block: ($) =>
seq(
// A heredoc block starts with a line break after the instruction it
// belongs to. The herdoc_nl token is a special token that only matches
// \n if there's at least one open heredoc to avoid conflicts.
// We also alias this token to hide it from the output like all other
// whitespace.
alias($.heredoc_nl, "_heredoc_nl"),
repeat(seq($.heredoc_line, "\n")),
$.heredoc_end
),
path: ($) => path: ($) =>
seq( seq(
choice( choice(
/[^-\s\$]/, // cannot start with a '-' to avoid conflicts with params /[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^<]/, // cannot start with a '<<' to avoid conflicts with heredocs (a single < is fine, though)
$.expansion $.expansion
), ),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion)) repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
), ),
path_with_heredoc: ($) =>
choice(
$.heredoc_marker,
seq(
choice(
/[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^-\s\$<]/,
$.expansion
),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
)
),
expansion: $ => expansion: $ =>
seq("$", $._expansion_body), seq("$", $._expansion_body),
@ -361,9 +397,11 @@ module.exports = grammar({
// |--------param-------| // |--------param-------|
// |--shell_command--| // |--shell_command--|
// //
seq($.heredoc_marker, /[ \t]*/),
/[,=-]/, /[,=-]/,
/[^\\\[\n#\s,=-][^\\\n]*/, /[^\\\[\n#\s,=-][^\\\n<]*/,
/\\[^\n,=-]/ /\\[^\n,=-]/,
/<[^<]/,
) )
), ),
@ -452,7 +490,7 @@ module.exports = grammar({
) )
), ),
_non_newline_whitespace: ($) => /[\t ]+/, _non_newline_whitespace: ($) => token.immediate(/[\t ]+/),
comment: ($) => /#.*/, comment: ($) => /#.*/,
}, },

View file

@ -3,16 +3,29 @@
"version": "0.1.0", "version": "0.1.0",
"description": "A tree-sitter module for the Dockerfile grammar", "description": "A tree-sitter module for the Dockerfile grammar",
"main": "bindings/node", "main": "bindings/node",
"types": "bindings/node",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1",
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip"
}, },
"author": "Camden Cheek <camden@ccheek.com>", "author": "Camden Cheek <camden@ccheek.com>",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"nan": "^2.14.2" "node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
}, },
"devDependencies": { "devDependencies": {
"tree-sitter-cli": "^0.20.8" "tree-sitter-cli": "^0.20.8",
"prebuildify": "^6.0.0"
}, },
"tree-sitter": [ "tree-sitter": [
{ {
@ -27,5 +40,13 @@
"queries/highlights.scm" "queries/highlights.scm"
] ]
} }
],
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**"
] ]
} }

27
pnpm-lock.yaml Normal file
View file

@ -0,0 +1,27 @@
lockfileVersion: '6.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
dependencies:
nan:
specifier: ^2.14.2
version: 2.18.0
devDependencies:
tree-sitter-cli:
specifier: ^0.20.1
version: 0.20.8
packages:
/nan@2.18.0:
resolution: {integrity: sha512-W7tfG7vMOGtD30sHoZSSc/JVYiyDPEyQVso/Zz+/uQd0B0L46gtC+pHha5FFMRpil6fm/AoEcRWyOVi4+E/f8w==}
dev: false
/tree-sitter-cli@0.20.8:
resolution: {integrity: sha512-XjTcS3wdTy/2cc/ptMLc/WRyOLECRYcMTrSWyhZnj1oGSOWbHLTklgsgRICU3cPfb0vy+oZCC33M43u6R1HSCA==}
hasBin: true
requiresBuild: true
dev: true

29
pyproject.toml Normal file
View file

@ -0,0 +1,29 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "tree-sitter-dockerfile"
description = "Dockerfile grammar for tree-sitter"
version = "0.1.0"
keywords = ["incremental", "parsing", "tree-sitter", "dockerfile"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
"Typing :: Typed"
]
requires-python = ">=3.8"
license.text = "MIT"
readme = "README.md"
[project.urls]
Homepage = "https://github.com/camdencheek/tree-sitter-dockerfile"
[project.optional-dependencies]
core = ["tree-sitter~=0.21"]
[tool.cibuildwheel]
build = "cp38-*"
build-frontend = "build"

View file

@ -19,6 +19,8 @@
"SHELL" "SHELL"
"MAINTAINER" "MAINTAINER"
"CROSS_BUILD" "CROSS_BUILD"
(heredoc_marker)
(heredoc_end)
] @keyword ] @keyword
[ [
@ -39,6 +41,7 @@
(double_quoted_string) (double_quoted_string)
(single_quoted_string) (single_quoted_string)
(json_string) (json_string)
(heredoc_line)
] @string ] @string
(expansion (expansion

60
setup.py generated Normal file
View file

@ -0,0 +1,60 @@
from os.path import isdir, join
from platform import system
from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from wheel.bdist_wheel import bdist_wheel
class Build(build):
def run(self):
if isdir("queries"):
dest = join(self.build_lib, "tree_sitter_dockerfile", "queries")
self.copy_tree("queries", dest)
super().run()
class BdistWheel(bdist_wheel):
def get_tag(self):
python, abi, platform = super().get_tag()
if python.startswith("cp"):
python, abi = "cp38", "abi3"
return python, abi, platform
setup(
packages=find_packages("bindings/python"),
package_dir={"": "bindings/python"},
package_data={
"tree_sitter_dockerfile": ["*.pyi", "py.typed"],
"tree_sitter_dockerfile.queries": ["*.scm"],
},
ext_package="tree_sitter_dockerfile",
ext_modules=[
Extension(
name="_binding",
sources=[
"bindings/python/tree_sitter_dockerfile/binding.c",
"src/parser.c",
"src/scanner.c",
],
extra_compile_args=[
"-std=c11",
] if system() != "Windows" else [
"/std:c11",
"/utf-8",
],
define_macros=[
("Py_LIMITED_API", "0x03080000"),
("PY_SSIZE_T_CLEAN", None)
],
include_dirs=["src"],
py_limited_api=True,
)
],
cmdclass={
"build": Build,
"bdist_wheel": BdistWheel
},
zip_safe=False
)

185
src/grammar.json generated
View file

@ -207,6 +207,13 @@
"name": "shell_command" "name": "shell_command"
} }
] ]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
} }
] ]
}, },
@ -348,8 +355,13 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "ALIAS",
"content": {
"type": "SYMBOL", "type": "SYMBOL",
"name": "path" "name": "path_with_heredoc"
},
"named": true,
"value": "path"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -359,8 +371,20 @@
} }
}, },
{ {
"type": "ALIAS",
"content": {
"type": "SYMBOL", "type": "SYMBOL",
"name": "path" "name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
} }
] ]
}, },
@ -389,8 +413,13 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "ALIAS",
"content": {
"type": "SYMBOL", "type": "SYMBOL",
"name": "path" "name": "path_with_heredoc"
},
"named": true,
"value": "path"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -400,8 +429,20 @@
} }
}, },
{ {
"type": "ALIAS",
"content": {
"type": "SYMBOL", "type": "SYMBOL",
"name": "path" "name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
} }
] ]
}, },
@ -840,6 +881,40 @@
} }
] ]
}, },
"heredoc_block": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "heredoc_nl"
},
"named": false,
"value": "_heredoc_nl"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_line"
},
{
"type": "STRING",
"value": "\n"
}
]
}
},
{
"type": "SYMBOL",
"name": "heredoc_end"
}
]
},
"path": { "path": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@ -848,7 +923,11 @@
"members": [ "members": [
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[^-\\s\\$]" "value": "[^-\\s\\$<]"
},
{
"type": "PATTERN",
"value": "<[^<]"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -877,6 +956,56 @@
} }
] ]
}, },
"path_with_heredoc": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^-\\s\\$<]"
},
{
"type": "PATTERN",
"value": "<[^-\\s\\$<]"
},
{
"type": "SYMBOL",
"name": "expansion"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\\s\\$]+"
}
},
{
"type": "SYMBOL",
"name": "_immediate_expansion"
}
]
}
}
]
}
]
},
"expansion": { "expansion": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@ -1508,17 +1637,34 @@
"content": { "content": {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "PATTERN",
"value": "[ \\t]*"
}
]
},
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[,=-]" "value": "[,=-]"
}, },
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[^\\\\\\[\\n#\\s,=-][^\\\\\\n]*" "value": "[^\\\\\\[\\n#\\s,=-][^\\\\\\n<]*"
}, },
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "\\\\[^\\n,=-]" "value": "\\\\[^\\n,=-]"
},
{
"type": "PATTERN",
"value": "<[^<]"
} }
] ]
} }
@ -1797,8 +1943,11 @@
} }
}, },
"_non_newline_whitespace": { "_non_newline_whitespace": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN", "type": "PATTERN",
"value": "[\\t ]+" "value": "[\\t ]+"
}
}, },
"comment": { "comment": {
"type": "PATTERN", "type": "PATTERN",
@ -1817,8 +1966,28 @@
], ],
"conflicts": [], "conflicts": [],
"precedences": [], "precedences": [],
"externals": [], "externals": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "SYMBOL",
"name": "heredoc_line"
},
{
"type": "SYMBOL",
"name": "heredoc_end"
},
{
"type": "SYMBOL",
"name": "heredoc_nl"
},
{
"type": "SYMBOL",
"name": "error_sentinel"
}
],
"inline": [], "inline": [],
"supertypes": [] "supertypes": []
} }

63
src/node-types.json generated
View file

@ -7,6 +7,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "heredoc_block",
"named": true
},
{ {
"type": "param", "type": "param",
"named": true "named": true
@ -84,6 +88,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "heredoc_block",
"named": true
},
{ {
"type": "param", "type": "param",
"named": true "named": true
@ -275,6 +283,25 @@
] ]
} }
}, },
{
"type": "heredoc_block",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "heredoc_end",
"named": true
},
{
"type": "heredoc_line",
"named": true
}
]
}
},
{ {
"type": "image_alias", "type": "image_alias",
"named": true, "named": true,
@ -606,6 +633,10 @@
{ {
"type": "expansion", "type": "expansion",
"named": true "named": true
},
{
"type": "heredoc_marker",
"named": true
} }
] ]
} }
@ -618,6 +649,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "heredoc_block",
"named": true
},
{ {
"type": "json_string_array", "type": "json_string_array",
"named": true "named": true
@ -663,7 +698,17 @@
{ {
"type": "shell_fragment", "type": "shell_fragment",
"named": true, "named": true,
"fields": {} "fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "heredoc_marker",
"named": true
}
]
}
}, },
{ {
"type": "shell_instruction", "type": "shell_instruction",
@ -1028,10 +1073,26 @@
"type": "]", "type": "]",
"named": false "named": false
}, },
{
"type": "_heredoc_nl",
"named": false
},
{ {
"type": "escape_sequence", "type": "escape_sequence",
"named": true "named": true
}, },
{
"type": "heredoc_end",
"named": true
},
{
"type": "heredoc_line",
"named": true
},
{
"type": "heredoc_marker",
"named": true
},
{ {
"type": "mount", "type": "mount",
"named": false "named": false

12669
src/parser.c generated

File diff suppressed because it is too large Load diff

318
src/scanner.c Normal file
View file

@ -0,0 +1,318 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include "tree_sitter/parser.h"
#define MAX_HEREDOCS 10
typedef struct {
bool in_heredoc;
bool stripping_heredoc;
unsigned heredoc_count;
char *heredocs[MAX_HEREDOCS];
} scanner_state;
enum TokenType {
HEREDOC_MARKER,
HEREDOC_LINE,
HEREDOC_END,
HEREDOC_NL,
ERROR_SENTINEL,
};
void *tree_sitter_dockerfile_external_scanner_create() {
scanner_state *state = malloc(sizeof(scanner_state));
memset(state, 0, sizeof(scanner_state));
return state;
}
void tree_sitter_dockerfile_external_scanner_destroy(void *payload) {
if (!payload)
return;
scanner_state *state = payload;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
if (state->heredocs[i]) {
free(state->heredocs[i]);
}
}
free(state);
}
unsigned tree_sitter_dockerfile_external_scanner_serialize(void *payload,
char *buffer) {
scanner_state *state = payload;
unsigned pos = 0;
buffer[pos++] = state->in_heredoc;
buffer[pos++] = state->stripping_heredoc;
for (unsigned i = 0; i < state->heredoc_count; i++) {
// Add the ending null byte to the length since we'll have to copy it as
// well.
unsigned len = strlen(state->heredocs[i]) + 1;
// If we run out of space, just drop the heredocs that don't fit.
// We need at least len + 1 bytes space since we'll copy len bytes below
// and later add a null byte at the end.
if (pos + len + 1 > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
break;
}
memcpy(&buffer[pos], state->heredocs[i], len);
pos += len;
}
// Add a null byte at the end to make it easy to detect.
buffer[pos++] = 0;
return pos;
}
void tree_sitter_dockerfile_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
scanner_state *state = payload;
// Free all current heredocs to avoid leaking memory when we overwrite the
// array later.
for (unsigned i = 0; i < state->heredoc_count; i++) {
free(state->heredocs[i]);
state->heredocs[i] = NULL;
}
if (length == 0) {
state->in_heredoc = false;
state->stripping_heredoc = false;
state->heredoc_count = 0;
} else {
unsigned pos = 0;
state->in_heredoc = buffer[pos++];
state->stripping_heredoc = buffer[pos++];
unsigned heredoc_count = 0;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
unsigned len = strlen(&buffer[pos]);
// We found the ending null byte which means that we're done.
if (len == 0)
break;
// Account for the ending null byte in strings (again).
len++;
char *heredoc = malloc(len);
memcpy(heredoc, &buffer[pos], len);
state->heredocs[i] = heredoc;
heredoc_count++;
pos += len;
}
state->heredoc_count = heredoc_count;
}
}
static void skip_whitespace(TSLexer *lexer) {
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
iswspace(lexer->lookahead))
lexer->advance(lexer, true);
}
static bool scan_marker(scanner_state *state, TSLexer *lexer) {
skip_whitespace(lexer);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
bool stripping = false;
if (lexer->lookahead == '-') {
stripping = true;
lexer->advance(lexer, false);
}
int32_t quote = 0;
if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
quote = lexer->lookahead;
lexer->advance(lexer, false);
}
// Reserve a reasonable amount of space for the heredoc delimiter string.
// Most heredocs (like EOF, EOT, EOS, FILE, etc.) are pretty short so we'll
// usually only need a few bytes. We're also limited to less than 1024 bytes
// by tree-sitter since our state has to fit in
// TREE_SITTER_SERIALIZATION_BUFFER_SIZE.
const unsigned int del_space = 512;
char delimiter[del_space];
// We start recording the actual string at position 1 since we store whether
// it's a stripping heredoc in the first position (with either a dash or a
// space).
unsigned del_idx = 1;
while (lexer->lookahead != '\0' &&
(quote ? lexer->lookahead != quote : !iswspace(lexer->lookahead))) {
if (lexer->lookahead == '\\') {
lexer->advance(lexer, false);
if (lexer->lookahead == '\0') {
return false;
}
}
if (del_idx > 0) {
delimiter[del_idx++] = lexer->lookahead;
}
lexer->advance(lexer, false);
// If we run out of space, stop recording the delimiter but keep
// advancing the lexer to ensure that we at least parse the marker
// correctly. Reserve two bytes: one for the strip indicator and
// one for the terminating null byte.
if (del_idx >= del_space - 2) {
del_idx = 0;
}
}
if (quote) {
if (lexer->lookahead != quote) {
return false;
}
lexer->advance(lexer, false);
}
if (del_idx == 0) {
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
delimiter[0] = stripping ? '-' : ' ';
delimiter[del_idx] = '\0';
// We copy the delimiter string to the heap here since we can't store our
// stack-allocated string in our state (which is stored on the heap).
char *del_copy = malloc(del_idx + 1);
memcpy(del_copy, delimiter, del_idx + 1);
if (state->heredoc_count == 0) {
state->heredoc_count = 1;
state->heredocs[0] = del_copy;
state->stripping_heredoc = stripping;
} else if (state->heredoc_count >= MAX_HEREDOCS) {
free(del_copy);
} else {
state->heredocs[state->heredoc_count++] = del_copy;
}
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
static bool scan_content(scanner_state *state, TSLexer *lexer,
const bool *valid_symbols) {
if (state->heredoc_count == 0) {
state->in_heredoc = false;
return false;
}
state->in_heredoc = true;
if (state->stripping_heredoc) {
skip_whitespace(lexer);
}
if (valid_symbols[HEREDOC_END]) {
unsigned delim_idx = 1;
// Look for the current heredoc delimiter.
while (state->heredocs[0][delim_idx] != '\0' &&
lexer->lookahead != '\0' &&
lexer->lookahead == state->heredocs[0][delim_idx]) {
lexer->advance(lexer, false);
delim_idx++;
}
// Check if the entire string matched.
if (state->heredocs[0][delim_idx] == '\0') {
lexer->result_symbol = HEREDOC_END;
// Shift the first heredoc off the list.
free(state->heredocs[0]);
for (unsigned i = 1; i < state->heredoc_count; i++) {
state->heredocs[i - 1] = state->heredocs[i];
}
state->heredocs[state->heredoc_count - 1] = NULL;
state->heredoc_count--;
if (state->heredoc_count > 0) {
state->stripping_heredoc = state->heredocs[0][0] == '-';
} else {
state->in_heredoc = false;
}
return true;
}
}
if (!valid_symbols[HEREDOC_LINE])
return false;
lexer->result_symbol = HEREDOC_LINE;
for (;;) {
switch (lexer->lookahead) {
case '\0':
if (lexer->eof(lexer)) {
state->in_heredoc = false;
return true;
}
lexer->advance(lexer, false);
break;
case '\n':
return true;
default:
lexer->advance(lexer, false);
}
}
}
bool tree_sitter_dockerfile_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
scanner_state *state = payload;
if (valid_symbols[ERROR_SENTINEL]) {
if (state->in_heredoc) {
return scan_content(state, lexer, valid_symbols);
} else {
return scan_marker(state, lexer);
}
}
// HEREDOC_NL only matches a linebreak if there are open heredocs. This is
// necessary to avoid a conflict in the grammar since a normal line break
// could either be the start of a heredoc or the end of an instruction.
if (valid_symbols[HEREDOC_NL]) {
if (state->heredoc_count > 0 && lexer->lookahead == '\n') {
lexer->result_symbol = HEREDOC_NL;
lexer->advance(lexer, false);
return true;
}
}
if (valid_symbols[HEREDOC_MARKER]) {
return scan_marker(state, lexer);
}
if (valid_symbols[HEREDOC_LINE] || valid_symbols[HEREDOC_END]) {
return scan_content(state, lexer, valid_symbols);
}
return false;
}

54
src/tree_sitter/alloc.h generated Normal file
View file

@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

290
src/tree_sitter/array.h generated Normal file
View file

@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View file

@ -13,9 +13,8 @@ extern "C" {
#define ts_builtin_sym_end 0 #define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_ #ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol; typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId; typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage; typedef struct TSLanguage TSLanguage;
@ -87,6 +86,11 @@ typedef union {
} entry; } entry;
} TSParseActionEntry; } TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage { struct TSLanguage {
uint32_t version; uint32_t version;
uint32_t symbol_count; uint32_t symbol_count;
@ -126,13 +130,38 @@ struct TSLanguage {
const TSStateId *primary_state_ids; const TSStateId *primary_state_ids;
}; };
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/* /*
* Lexer Macros * Lexer Macros
*/ */
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \
bool eof = false; \ bool eof = false; \
int32_t lookahead; \ int32_t lookahead; \
goto start; \ goto start; \
@ -148,6 +177,17 @@ struct TSLanguage {
goto next_state; \ goto next_state; \
} }
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \ #define SKIP(state_value) \
{ \ { \
skip = true; \ skip = true; \
@ -166,7 +206,7 @@ struct TSLanguage {
* Parse Table Macros * Parse Table Macros
*/ */
#define SMALL_STATE(id) id - LARGE_STATE_COUNT #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id #define STATE(id) id
@ -176,7 +216,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = state_value \ .state = (state_value) \
} \ } \
}} }}
@ -184,7 +224,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = state_value, \ .state = (state_value), \
.repetition = true \ .repetition = true \
} \ } \
}} }}
@ -197,13 +237,14 @@ struct TSLanguage {
} \ } \
}} }}
#define REDUCE(symbol_val, child_count_val, ...) \ #define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \ {{ \
.reduce = { \ .reduce = { \
.type = TSParseActionTypeReduce, \ .type = TSParseActionTypeReduce, \
.symbol = symbol_val, \ .symbol = symbol_name, \
.child_count = child_count_val, \ .child_count = children, \
__VA_ARGS__ \ .dynamic_precedence = precedence, \
.production_id = prod_id \
}, \ }, \
}} }}

435
test/corpus/heredoc.txt Normal file
View file

@ -0,0 +1,435 @@
==================
Basic heredoc
==================
FROM busybox AS build
RUN <<EOF
echo "i am" >> /dest
whoami >> /dest
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Basic heredoc with space after marker
==================
FROM busybox AS build
RUN <<EOF
echo "i am" >> /dest
whoami >> /dest
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run heredoc
==================
FROM busybox AS build
SHELL ["/bin/awk"]
RUN <<EOF
BEGIN {
print "foo" > "/dest"
}
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(shell_instruction
(json_string_array
(json_string)))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run heredoc with shebang
==================
FROM busybox AS build
RUN <<EOF
#!/bin/awk -f
BEGIN {
print "hello" >> "/dest"
print "world" >> "/dest"
}
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run complex heredoc
==================
FROM busybox AS build
WORKDIR /dest
RUN cat <<EOF1 | tr '[:upper:]' '[:lower:]' > ./out1; \
cat <<EOF2 | tr '[:lower:]' '[:upper:]' > ./out2
hello WORLD
EOF1
HELLO world
EOF2
RUN <<EOF 3<<IN1 4<<IN2 awk -f -
BEGIN {
while ((getline line < "/proc/self/fd/3") > 0)
print tolower(line) > "./fd3"
while ((getline line < "/proc/self/fd/4") > 0)
print toupper(line) > "./fd4"
}
EOF
hello WORLD
IN1
HELLO world
IN2
FROM scratch
COPY --from=build /dest /
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(workdir_instruction
(path))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker))
(line_continuation)
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)
(heredoc_marker)
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
=================================
Copy with heredoc
=================================
FROM busybox AS build
RUN adduser -D user
WORKDIR /dest
COPY <<EOF single
single file
EOF
COPY <<EOF <<EOF2 double/
first file
EOF
second file
EOF2
RUN mkdir -p /permfiles
COPY --chmod=777 <<EOF /permfiles/all
dummy content
EOF
COPY --chmod=0644 <<EOF /permfiles/rw
dummy content
EOF
COPY --chown=user:user <<EOF /permfiles/owned
dummy content
EOF
RUN stat -c "%04a" /permfiles/all >> perms && \
stat -c "%04a" /permfiles/rw >> perms && \
stat -c "%U:%G" /permfiles/owned >> perms
FROM scratch
COPY --from=build /dest /
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment)))
(workdir_instruction
(path))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(shell_fragment)
(line_continuation)
(shell_fragment)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Heredoc with special symbols
==================
FROM scratch
COPY <<EOF quotefile
"quotes in file"
EOF
COPY <<EOF slashfile1
\
EOF
COPY <<EOF slashfile2
\\
EOF
COPY <<EOF slashfile3
\$
EOF
COPY <<"EOF" rawslashfile1
\
EOF
COPY <<"EOF" rawslashfile2
\\
EOF
COPY <<"EOF" rawslashfile3
\$
EOF
---
(source_file
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end))))

View file

@ -78,3 +78,22 @@ run --mount=type=secret,id=aws,target=/root/.aws/credentials \
(line_continuation) (line_continuation)
(shell_command (shell_command
(shell_fragment)))) (shell_fragment))))
==================
Run with heredoc
==================
RUN cat <<EOF
test
EOF
---
(source_file
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_end))))