Compare commits

...

10 commits

Author SHA1 Message Date
Camden Cheek 087daa2043
bump tree-sitter-cli version
Some checks failed
CI / Test parser (macos-14) (push) Has been cancelled
CI / Test parser (ubuntu-latest) (push) Has been cancelled
CI / Test parser (windows-latest) (push) Has been cancelled
2024-05-09 10:30:58 -04:00
Camden Cheek 868e44ce37
bump version number
Some checks failed
Publish packages / npm (push) Has been cancelled
Publish packages / crates (push) Has been cancelled
Publish packages / pypi (push) Has been cancelled
2024-05-09 10:18:45 -04:00
Camden Cheek a16b6d231d
generate and bump version 2024-05-09 10:14:14 -04:00
Camden Cheek 439c3e7b8a
use extra to allow comments anywhere 2024-04-19 15:28:09 -06:00
Camden Cheek b4a90e2b98
remove unused args 2024-04-19 15:20:04 -06:00
Camden Cheek 40eceeff30
allow space after backslash to act as line continuation (#49) 2024-04-19 15:02:25 -06:00
Camden Cheek ecea7ac2c9
allow port ranges 2024-04-19 14:46:59 -06:00
Camden Cheek cde2a8ea7f
add publish workflow 2024-04-19 14:35:33 -06:00
Camden Cheek c4c8fd83f5
Configure CI (#41) 2024-04-19 14:20:33 -06:00
tvrinssen 1bf9daef46
Support heredocs (#45)
This adds support for heredocs in Dockerfiles. This required adding an external scanner to store the required state.

Co-authored-by: Camden Cheek <camden@ccheek.com>
2024-04-19 14:08:28 -06:00
58 changed files with 8569 additions and 6879 deletions

1
.clang-format Normal file
View file

@ -0,0 +1 @@
IndentWidth: 4

38
.editorconfig Normal file
View file

@ -0,0 +1,38 @@
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2
[*.js]
indent_style = space
indent_size = 2
[*.rs]
indent_style = space
indent_size = 4
[*.{c,cc,h}]
indent_style = space
indent_size = 4
[*.{py,pyi}]
indent_style = space
indent_size = 4
[*.swift]
indent_style = space
indent_size = 4
[*.go]
indent_style = tab
indent_size = 8
[Makefile]
indent_style = tab
indent_size = 8

11
.gitattributes vendored Normal file
View file

@ -0,0 +1,11 @@
* text eol=lf
src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated
bindings/** linguist-generated
binding.gyp linguist-generated
setup.py linguist-generated
Makefile linguist-generated
Package.swift linguist-generated

40
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,40 @@
name: CI
on:
push:
branches: [main]
pull_request:
concurrency:
group: ${{github.workflow}}-${{github.ref}}
cancel-in-progress: true
jobs:
test:
name: Test parser
runs-on: ${{matrix.os}}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-14]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up tree-sitter
uses: tree-sitter/setup-action/cli@v1
- name: Run tests
uses: tree-sitter/parser-test-action@v2
with:
test-rust: ${{runner.os == 'Linux'}}
- name: Parse examples
id: examples
continue-on-error: true
uses: tree-sitter/parse-action@v4
with:
files: examples/*
- uses: actions/upload-artifact@v4
if: steps.examples.outputs.failures != ''
with:
name: failures-${{matrix.os}}
path: ${{steps.examples.outputs.failures}}

23
.github/workflows/publish.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: Publish packages
on:
push:
tags: ["*"]
concurrency:
group: ${{github.workflow}}-${{github.ref}}
cancel-in-progress: true
jobs:
npm:
uses: tree-sitter/workflows/.github/workflows/package-npm.yml@main
secrets:
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
crates:
uses: tree-sitter/workflows/.github/workflows/package-crates.yml@main
secrets:
CARGO_REGISTRY_TOKEN: ${{secrets.CARGO_REGISTRY_TOKEN}}
pypi:
uses: tree-sitter/workflows/.github/workflows/package-pypi.yml@main
secrets:
PYPI_API_TOKEN: ${{secrets.PYPI_API_TOKEN}}

View file

@ -1,7 +1,7 @@
[package]
name = "tree-sitter-dockerfile"
description = "dockerfile grammar for the tree-sitter parsing library"
version = "0.1.0"
version = "0.2.0"
keywords = ["incremental", "parsing", "dockerfile"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/camdencheek/tree-sitter-dockerfile"

142
Makefile generated
View file

@ -1,23 +1,25 @@
VERSION := 0.1.2
VERSION := 0.2.0
# Repository
LANGUAGE_NAME := tree-sitter-dockerfile
# repository
SRC_DIR := src
PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin )
PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null)
ifeq (, $(PARSER_NAME))
PARSER_NAME := $(shell basename $(PARSER_REPO_URL))
PARSER_NAME := $(subst tree-sitter-,,$(PARSER_NAME))
PARSER_NAME := $(subst .git,,$(PARSER_NAME))
endif
ifeq (, $(PARSER_URL))
PARSER_URL := $(subst :,/,$(PARSER_REPO_URL))
ifeq ($(PARSER_URL),)
PARSER_URL := $(subst .git,,$(PARSER_REPO_URL))
ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),)
PARSER_URL := $(subst :,/,$(PARSER_URL))
PARSER_URL := $(subst git@,https://,$(PARSER_URL))
PARSER_URL := $(subst .git,,$(PARSER_URL))
endif
endif
UPPER_PARSER_NAME := $(shell echo $(PARSER_NAME) | tr a-z A-Z )
TS ?= tree-sitter
# ABI versioning
SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
# install directory layout
PREFIX ?= /usr/local
@ -25,90 +27,86 @@ INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# collect C++ sources, and link if necessary
CPPSRC := $(wildcard $(SRC_DIR)/*.cc)
# source/object files
PARSER := $(SRC_DIR)/parser.c
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
ifeq (, $(CPPSRC))
ADDITIONALLIBS :=
else
ADDITIONALLIBS := -lc++
endif
# collect sources
SRC := $(wildcard $(SRC_DIR)/*.c)
SRC += $(CPPSRC)
OBJ := $(addsuffix .o,$(basename $(SRC)))
# ABI versioning
SONAME_MAJOR := 0
SONAME_MINOR := 0
CFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
CXXFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
override CFLAGS += -std=gnu99 -fPIC
override CXXFLAGS += -fPIC
# flags
ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# OS-specific bits
ifeq ($(shell uname),Darwin)
ifeq ($(OS),Windows_NT)
$(error "Windows is not supported")
else ifeq ($(shell uname),Darwin)
SOEXT = dylib
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS),
ifneq ($(ADDITIONAL_LIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS),
endif
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/libtree-sitter-$(PARSER_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
else
SOEXT = so
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
LINKSHARED := $(LINKSHARED)-shared -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS),
ifneq ($(ADDITIONAL_LIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS)
endif
LINKSHARED := $(LINKSHARED)-soname,libtree-sitter-$(PARSER_NAME).so.$(SONAME_MAJOR)
LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR)
endif
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
all: libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h bindings/c/tree-sitter-$(PARSER_NAME).pc
all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc
libtree-sitter-$(PARSER_NAME).a: $(OBJ)
$(AR) rcs $@ $^
lib$(LANGUAGE_NAME).a: $(OBJS)
$(AR) $(ARFLAGS) $@ $^
libtree-sitter-$(PARSER_NAME).$(SOEXTVER): $(OBJ)
lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXT)
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
ifneq ($(STRIP),)
$(STRIP) $@
endif
bindings/c/$(PARSER_NAME).h:
sed -e 's|@UPPER_PARSERNAME@|$(UPPER_PARSER_NAME)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
bindings/c/tree-sitter.h.in > $@
bindings/c/tree-sitter-$(PARSER_NAME).pc:
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
sed -e 's|@URL@|$(PARSER_URL)|' \
-e 's|@VERSION@|$(VERSION)|' \
-e 's|@LIBDIR@|$(LIBDIR)|' \
-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-e 's|@REQUIRES@|$(REQUIRES)|' \
-e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@ADDITIONALLIBS@|$(ADDITIONALLIBS)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
-e 's|@PARSERURL@|$(PARSER_URL)|' \
bindings/c/tree-sitter.pc.in > $@
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
$(PARSER): $(SRC_DIR)/grammar.json
$(TS) generate --no-bindings $^
install: all
install -d '$(DESTDIR)$(LIBDIR)'
install -m755 libtree-sitter-$(PARSER_NAME).a '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).a
install -m755 libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXT)
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
install -m644 bindings/c/$(PARSER_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
install -d '$(DESTDIR)$(PCLIBDIR)'
install -m644 bindings/c/tree-sitter-$(PARSER_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)
uninstall:
$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
clean:
rm -f $(OBJ) libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXT) libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR) libtree-sitter-$(PARSER_NAME).$(SOEXTVER)
rm -f bindings/c/$(PARSER_NAME).h bindings/c/tree-sitter-$(PARSER_NAME).pc
$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT)
.PHONY: all install clean
test:
$(TS) test
.PHONY: all install uninstall clean test

1
Package.swift generated
View file

@ -28,6 +28,7 @@ let package = Package(
],
sources: [
"src/parser.c",
"src/scanner.c",
],
resources: [
.copy("queries")

23
binding.gyp generated
View file

@ -2,18 +2,29 @@
"targets": [
{
"target_name": "tree_sitter_dockerfile_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
"src",
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
# If your language uses an external scanner, add it here.
"src/scanner.c",
],
"conditions": [
["OS!='win'", {
"cflags_c": [
"-std=c11",
],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
"cflags_c": [
"-std=c99",
]
}
]
}

11
bindings/c/tree-sitter-dockerfile.pc.in generated Normal file
View file

@ -0,0 +1,11 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
Name: tree-sitter-dockerfile
Description: Dockerfile grammar for tree-sitter
URL: @URL@
Version: @VERSION@
Requires: @REQUIRES@
Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-dockerfile
Cflags: -I${includedir}

13
bindings/go/binding.go generated Normal file
View file

@ -0,0 +1,13 @@
package tree_sitter_dockerfile
// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// #include "../../src/scanner.c"
import "C"
import "unsafe"
// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
return unsafe.Pointer(C.tree_sitter_dockerfile())
}

15
bindings/go/binding_test.go generated Normal file
View file

@ -0,0 +1,15 @@
package tree_sitter_dockerfile_test
import (
"testing"
tree_sitter_dockerfile "github.com/camdencheek/tree-sitter-dockerfile"
tree_sitter "github.com/smacker/go-tree-sitter"
)
func TestCanLoadGrammar(t *testing.T) {
language := tree_sitter.NewLanguage(tree_sitter_dockerfile.Language())
if language == nil {
t.Errorf("Error loading Dockerfile grammar")
}
}

5
bindings/go/go.mod generated Normal file
View file

@ -0,0 +1,5 @@
module github.com/camdencheek/tree-sitter-dockerfile
go 1.22
require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8

View file

@ -1,28 +1,20 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
#include <napi.h>
using namespace v8;
typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage * tree_sitter_dockerfile();
extern "C" TSLanguage *tree_sitter_dockerfile();
namespace {
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_dockerfile());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("dockerfile").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
exports["name"] = Napi::String::New(env, "dockerfile");
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_dockerfile());
language.TypeTag(&LANGUAGE_TYPE_TAG);
exports["language"] = language;
return exports;
}
NODE_MODULE(tree_sitter_dockerfile_binding, Init)
} // namespace
NODE_API_MODULE(tree_sitter_dockerfile_binding, Init)

28
bindings/node/index.d.ts generated vendored Normal file
View file

@ -0,0 +1,28 @@
type BaseNode = {
type: string;
named: boolean;
};
type ChildNode = {
multiple: boolean;
required: boolean;
types: BaseNode[];
};
type NodeInfo =
| (BaseNode & {
subtypes: BaseNode[];
})
| (BaseNode & {
fields: { [name: string]: ChildNode };
children: ChildNode[];
});
type Language = {
name: string;
language: unknown;
nodeTypeInfo: NodeInfo[];
};
declare const language: Language;
export = language;

18
bindings/node/index.js generated
View file

@ -1,18 +1,6 @@
try {
module.exports = require("../../build/Release/tree_sitter_dockerfile_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_dockerfile_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
const root = require("path").join(__dirname, "..", "..");
module.exports = require("node-gyp-build")(root);
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");

View file

@ -0,0 +1,5 @@
"Dockerfile grammar for tree-sitter"
from ._binding import language
__all__ = ["language"]

View file

@ -0,0 +1 @@
def language() -> int: ...

View file

@ -0,0 +1,27 @@
#include <Python.h>
typedef struct TSLanguage TSLanguage;
TSLanguage *tree_sitter_dockerfile(void);
static PyObject* _binding_language(PyObject *self, PyObject *args) {
return PyLong_FromVoidPtr(tree_sitter_dockerfile());
}
static PyMethodDef methods[] = {
{"language", _binding_language, METH_NOARGS,
"Get the tree-sitter language for this grammar."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_binding",
.m_doc = NULL,
.m_size = -1,
.m_methods = methods
};
PyMODINIT_FUNC PyInit__binding(void) {
return PyModule_Create(&module);
}

View file

View file

@ -7,17 +7,18 @@ fn main() {
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
// If your language uses an external scanner written in C,
// then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

View file

@ -1,10 +1,17 @@
module.exports = grammar({
name: "dockerfile",
extras: ($) => [/\s+/, $.line_continuation],
extras: ($) => [/\s+/, $.line_continuation, $.comment],
externals: ($) => [
$.heredoc_marker,
$.heredoc_line,
$.heredoc_end,
$.heredoc_nl,
$.error_sentinel,
],
rules: {
source_file: ($) => repeat(seq(choice($._instruction, $.comment), "\n")),
source_file: ($) => repeat(seq($._instruction, "\n")),
_instruction: ($) =>
choice(
@ -26,7 +33,7 @@ module.exports = grammar({
$.healthcheck_instruction,
$.shell_instruction,
$.maintainer_instruction,
$.cross_build_instruction
$.cross_build_instruction,
),
from_instruction: ($) =>
@ -46,7 +53,8 @@ module.exports = grammar({
$.mount_param
)
),
choice($.json_string_array, $.shell_command)
choice($.json_string_array, $.shell_command),
repeat($.heredoc_block)
),
cmd_instruction: ($) =>
@ -75,9 +83,10 @@ module.exports = grammar({
alias(/[aA][dD][dD]/, "ADD"),
repeat($.param),
repeat1(
seq($.path, $._non_newline_whitespace)
seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
),
$.path
alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
),
copy_instruction: ($) =>
@ -85,9 +94,10 @@ module.exports = grammar({
alias(/[cC][oO][pP][yY]/, "COPY"),
repeat($.param),
repeat1(
seq($.path, $._non_newline_whitespace)
seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
),
$.path
alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
),
entrypoint_instruction: ($) =>
@ -178,13 +188,13 @@ module.exports = grammar({
shell_instruction: ($) =>
seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.json_string_array),
maintainer_instruction: ($) =>
maintainer_instruction: () =>
seq(
alias(/[mM][aA][iI][nN][tT][aA][iI][nN][eE][rR]/, "MAINTAINER"),
/.*/
),
cross_build_instruction: ($) =>
cross_build_instruction: () =>
seq(
alias(
/[cC][rR][oO][sS][sS]_[bB][uU][iI][lL][dD][a-zA-Z_]*/,
@ -193,15 +203,41 @@ module.exports = grammar({
/.*/
),
heredoc_block: ($) =>
seq(
// A heredoc block starts with a line break after the instruction it
// belongs to. The herdoc_nl token is a special token that only matches
// \n if there's at least one open heredoc to avoid conflicts.
// We also alias this token to hide it from the output like all other
// whitespace.
alias($.heredoc_nl, "_heredoc_nl"),
repeat(seq($.heredoc_line, "\n")),
$.heredoc_end
),
path: ($) =>
seq(
choice(
/[^-\s\$]/, // cannot start with a '-' to avoid conflicts with params
/[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^<]/, // cannot start with a '<<' to avoid conflicts with heredocs (a single < is fine, though)
$.expansion
),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
),
path_with_heredoc: ($) =>
choice(
$.heredoc_marker,
seq(
choice(
/[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^-\s\$<]/,
$.expansion
),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
)
),
expansion: $ =>
seq("$", $._expansion_body),
@ -220,7 +256,7 @@ module.exports = grammar({
)
),
variable: ($) => token.immediate(/[a-zA-Z_][a-zA-Z0-9_]*/),
variable: () => token.immediate(/[a-zA-Z_][a-zA-Z0-9_]*/),
env_pair: ($) =>
seq(
@ -251,7 +287,7 @@ module.exports = grammar({
_env_key: ($) =>
alias(/[a-zA-Z_][a-zA-Z0-9_]*/, $.unquoted_string),
expose_port: ($) => seq(/\d+/, optional(choice("/tcp", "/udp"))),
expose_port: () => seq(/\d+(-\d+)?/, optional(choice("/tcp", "/udp"))),
label_pair: ($) =>
seq(
@ -297,7 +333,7 @@ module.exports = grammar({
),
// Generic parsing of options passed right after an instruction name.
param: ($) =>
param: () =>
seq(
"--",
field("name", token.immediate(/[a-z][-a-z]*/)),
@ -324,7 +360,7 @@ module.exports = grammar({
)
),
mount_param_param: ($) => seq(
mount_param_param: () => seq(
token.immediate(/[^\s=,]+/),
token.immediate("="),
token.immediate(/[^\s=,]+/)
@ -337,12 +373,10 @@ module.exports = grammar({
shell_command: ($) =>
seq(
repeat($._comment_line),
$.shell_fragment,
repeat(
seq(
alias($.required_line_continuation, $.line_continuation),
repeat($._comment_line),
$.shell_fragment
)
)
@ -361,18 +395,16 @@ module.exports = grammar({
// |--------param-------|
// |--shell_command--|
//
seq($.heredoc_marker, /[ \t]*/),
/[,=-]/,
/[^\\\[\n#\s,=-][^\\\n]*/,
/\\[^\n,=-]/
/[^\\\[\n#\s,=-][^\\\n<]*/,
/\\[^\n,=-]/,
/<[^<]/,
)
),
line_continuation: ($) => "\\\n",
required_line_continuation: ($) => "\\\n",
_comment_line: ($) => seq(alias($._anon_comment, $.comment), "\n"),
_anon_comment: ($) => seq("#", /.*/),
line_continuation: () => /\\[ \t]*\n/,
required_line_continuation: () => "\\\n",
json_string_array: ($) =>
seq(
@ -397,7 +429,7 @@ module.exports = grammar({
'"'
),
json_escape_sequence: ($) => token.immediate(
json_escape_sequence: () => token.immediate(
/\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4})/
),
@ -438,22 +470,22 @@ module.exports = grammar({
)
),
double_quoted_escape_sequence: ($) => token.immediate(
double_quoted_escape_sequence: () => token.immediate(
choice(
"\\\\",
"\\\""
)
),
single_quoted_escape_sequence: ($) => token.immediate(
single_quoted_escape_sequence: () => token.immediate(
choice(
"\\\\",
"\\'"
)
),
_non_newline_whitespace: ($) => /[\t ]+/,
_non_newline_whitespace: () => token.immediate(/[\t ]+/),
comment: ($) => /#.*/,
comment: () => /#.*/,
},
});

View file

@ -1,18 +1,31 @@
{
"name": "tree-sitter-dockerfile",
"version": "0.1.0",
"version": "0.2.0",
"description": "A tree-sitter module for the Dockerfile grammar",
"main": "bindings/node",
"types": "bindings/node",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"test": "echo \"Error: no test specified\" && exit 1",
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip"
},
"author": "Camden Cheek <camden@ccheek.com>",
"license": "ISC",
"dependencies": {
"nan": "^2.14.2"
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
},
"devDependencies": {
"tree-sitter-cli": "^0.20.8"
"tree-sitter-cli": "^0.21.0",
"prebuildify": "^6.0.0"
},
"tree-sitter": [
{
@ -27,5 +40,13 @@
"queries/highlights.scm"
]
}
],
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**"
]
}

201
pnpm-lock.yaml Normal file
View file

@ -0,0 +1,201 @@
lockfileVersion: '6.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
dependencies:
node-addon-api:
specifier: ^7.1.0
version: 7.1.0
node-gyp-build:
specifier: ^4.8.0
version: 4.8.1
tree-sitter:
specifier: ^0.21.0
version: 0.21.1
devDependencies:
prebuildify:
specifier: ^6.0.0
version: 6.0.1
tree-sitter-cli:
specifier: ^0.20.8
version: 0.20.8
packages:
/base64-js@1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
dev: true
/bl@4.1.0:
resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==}
dependencies:
buffer: 5.7.1
inherits: 2.0.4
readable-stream: 3.6.2
dev: true
/buffer@5.7.1:
resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
dependencies:
base64-js: 1.5.1
ieee754: 1.2.1
dev: true
/chownr@1.1.4:
resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==}
dev: true
/end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
dependencies:
once: 1.4.0
dev: true
/fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
dev: true
/ieee754@1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: true
/inherits@2.0.4:
resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
dev: true
/minimist@1.2.8:
resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
dev: true
/mkdirp-classic@0.5.3:
resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==}
dev: true
/node-abi@3.62.0:
resolution: {integrity: sha512-CPMcGa+y33xuL1E0TcNIu4YyaZCxnnvkVaEXrsosR3FxN+fV8xvb7Mzpb7IgKler10qeMkE6+Dp8qJhpzdq35g==}
engines: {node: '>=10'}
dependencies:
semver: 7.6.1
dev: true
/node-addon-api@7.1.0:
resolution: {integrity: sha512-mNcltoe1R8o7STTegSOHdnJNN7s5EUvhoS7ShnTHDyOSd+8H+UdWODq6qSv67PjC8Zc5JRT8+oLAMCr0SIXw7g==}
engines: {node: ^16 || ^18 || >= 20}
dev: false
/node-addon-api@8.0.0:
resolution: {integrity: sha512-ipO7rsHEBqa9STO5C5T10fj732ml+5kLN1cAG8/jdHd56ldQeGj3Q7+scUS+VHK/qy1zLEwC4wMK5+yM0btPvw==}
engines: {node: ^18 || ^20 || >= 21}
dev: false
/node-gyp-build@4.8.1:
resolution: {integrity: sha512-OSs33Z9yWr148JZcbZd5WiAXhh/n9z8TxQcdMhIOlpN9AhWpLfvVFO73+m77bBABQMaY9XSvIa+qk0jlI7Gcaw==}
hasBin: true
dev: false
/npm-run-path@3.1.0:
resolution: {integrity: sha512-Dbl4A/VfiVGLgQv29URL9xshU8XDY1GeLy+fsaZ1AA8JDSfjvr5P5+pzRbWqRSBxk6/DW7MIh8lTM/PaGnP2kg==}
engines: {node: '>=8'}
dependencies:
path-key: 3.1.1
dev: true
/once@1.4.0:
resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
dependencies:
wrappy: 1.0.2
dev: true
/path-key@3.1.1:
resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
engines: {node: '>=8'}
dev: true
/prebuildify@6.0.1:
resolution: {integrity: sha512-8Y2oOOateom/s8dNBsGIcnm6AxPmLH4/nanQzL5lQMU+sC0CMhzARZHizwr36pUPLdvBnOkCNQzxg4djuFSgIw==}
hasBin: true
dependencies:
minimist: 1.2.8
mkdirp-classic: 0.5.3
node-abi: 3.62.0
npm-run-path: 3.1.0
pump: 3.0.0
tar-fs: 2.1.1
dev: true
/pump@3.0.0:
resolution: {integrity: sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==}
dependencies:
end-of-stream: 1.4.4
once: 1.4.0
dev: true
/readable-stream@3.6.2:
resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==}
engines: {node: '>= 6'}
dependencies:
inherits: 2.0.4
string_decoder: 1.3.0
util-deprecate: 1.0.2
dev: true
/safe-buffer@5.2.1:
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
dev: true
/semver@7.6.1:
resolution: {integrity: sha512-f/vbBsu+fOiYt+lmwZV0rVwJScl46HppnOA1ZvIuBWKOTlllpyJ3bfVax76/OrhCH38dyxoDIA8K7uB963IYgA==}
engines: {node: '>=10'}
hasBin: true
dev: true
/string_decoder@1.3.0:
resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==}
dependencies:
safe-buffer: 5.2.1
dev: true
/tar-fs@2.1.1:
resolution: {integrity: sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==}
dependencies:
chownr: 1.1.4
mkdirp-classic: 0.5.3
pump: 3.0.0
tar-stream: 2.2.0
dev: true
/tar-stream@2.2.0:
resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==}
engines: {node: '>=6'}
dependencies:
bl: 4.1.0
end-of-stream: 1.4.4
fs-constants: 1.0.0
inherits: 2.0.4
readable-stream: 3.6.2
dev: true
/tree-sitter-cli@0.20.8:
resolution: {integrity: sha512-XjTcS3wdTy/2cc/ptMLc/WRyOLECRYcMTrSWyhZnj1oGSOWbHLTklgsgRICU3cPfb0vy+oZCC33M43u6R1HSCA==}
hasBin: true
requiresBuild: true
dev: true
/tree-sitter@0.21.1:
resolution: {integrity: sha512-7dxoA6kYvtgWw80265MyqJlkRl4yawIjO7S5MigytjELkX43fV2WsAXzsNfO7sBpPPCF5Gp0+XzHk0DwLCq3xQ==}
requiresBuild: true
dependencies:
node-addon-api: 8.0.0
node-gyp-build: 4.8.1
dev: false
/util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
dev: true
/wrappy@1.0.2:
resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
dev: true

29
pyproject.toml Normal file
View file

@ -0,0 +1,29 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "tree-sitter-dockerfile"
description = "Dockerfile grammar for tree-sitter"
version = "0.2.0"
keywords = ["incremental", "parsing", "tree-sitter", "dockerfile"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
"Typing :: Typed"
]
requires-python = ">=3.8"
license.text = "MIT"
readme = "README.md"
[project.urls]
Homepage = "https://github.com/camdencheek/tree-sitter-dockerfile"
[project.optional-dependencies]
core = ["tree-sitter~=0.21"]
[tool.cibuildwheel]
build = "cp38-*"
build-frontend = "build"

View file

@ -19,6 +19,8 @@
"SHELL"
"MAINTAINER"
"CROSS_BUILD"
(heredoc_marker)
(heredoc_end)
] @keyword
[
@ -36,9 +38,10 @@
"@" @punctuation.special))
[
(double_quoted_string)
(single_quoted_string)
(json_string)
(double_quoted_string)
(single_quoted_string)
(json_string)
(heredoc_line)
] @string
(expansion

60
setup.py generated Normal file
View file

@ -0,0 +1,60 @@
from os.path import isdir, join
from platform import system
from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from wheel.bdist_wheel import bdist_wheel
class Build(build):
def run(self):
if isdir("queries"):
dest = join(self.build_lib, "tree_sitter_dockerfile", "queries")
self.copy_tree("queries", dest)
super().run()
class BdistWheel(bdist_wheel):
def get_tag(self):
python, abi, platform = super().get_tag()
if python.startswith("cp"):
python, abi = "cp38", "abi3"
return python, abi, platform
setup(
packages=find_packages("bindings/python"),
package_dir={"": "bindings/python"},
package_data={
"tree_sitter_dockerfile": ["*.pyi", "py.typed"],
"tree_sitter_dockerfile.queries": ["*.scm"],
},
ext_package="tree_sitter_dockerfile",
ext_modules=[
Extension(
name="_binding",
sources=[
"bindings/python/tree_sitter_dockerfile/binding.c",
"src/parser.c",
"src/scanner.c",
],
extra_compile_args=[
"-std=c11",
] if system() != "Windows" else [
"/std:c11",
"/utf-8",
],
define_macros=[
("Py_LIMITED_API", "0x03080000"),
("PY_SSIZE_T_CLEAN", None)
],
include_dirs=["src"],
py_limited_api=True,
)
],
cmdclass={
"build": Build,
"bdist_wheel": BdistWheel
},
zip_safe=False
)

265
src/grammar.json generated
View file

@ -7,17 +7,8 @@
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_instruction"
},
{
"type": "SYMBOL",
"name": "comment"
}
]
"type": "SYMBOL",
"name": "_instruction"
},
{
"type": "STRING",
@ -207,6 +198,13 @@
"name": "shell_command"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
}
]
},
@ -348,8 +346,13 @@
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "path"
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "SYMBOL",
@ -359,8 +362,20 @@
}
},
{
"type": "SYMBOL",
"name": "path"
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
}
]
},
@ -389,8 +404,13 @@
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "path"
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "SYMBOL",
@ -400,8 +420,20 @@
}
},
{
"type": "SYMBOL",
"name": "path"
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "path_with_heredoc"
},
"named": true,
"value": "path"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "heredoc_block"
}
}
]
},
@ -840,6 +872,40 @@
}
]
},
"heredoc_block": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "heredoc_nl"
},
"named": false,
"value": "_heredoc_nl"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_line"
},
{
"type": "STRING",
"value": "\n"
}
]
}
},
{
"type": "SYMBOL",
"name": "heredoc_end"
}
]
},
"path": {
"type": "SEQ",
"members": [
@ -848,7 +914,11 @@
"members": [
{
"type": "PATTERN",
"value": "[^-\\s\\$]"
"value": "[^-\\s\\$<]"
},
{
"type": "PATTERN",
"value": "<[^<]"
},
{
"type": "SYMBOL",
@ -877,6 +947,56 @@
}
]
},
"path_with_heredoc": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^-\\s\\$<]"
},
{
"type": "PATTERN",
"value": "<[^-\\s\\$<]"
},
{
"type": "SYMBOL",
"name": "expansion"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[^\\s\\$]+"
}
},
{
"type": "SYMBOL",
"name": "_immediate_expansion"
}
]
}
}
]
}
]
},
"expansion": {
"type": "SEQ",
"members": [
@ -1066,7 +1186,7 @@
"members": [
{
"type": "PATTERN",
"value": "\\d+"
"value": "\\d+(-\\d+)?"
},
{
"type": "CHOICE",
@ -1462,13 +1582,6 @@
"shell_command": {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_comment_line"
}
},
{
"type": "SYMBOL",
"name": "shell_fragment"
@ -1487,13 +1600,6 @@
"named": true,
"value": "line_continuation"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_comment_line"
}
},
{
"type": "SYMBOL",
"name": "shell_fragment"
@ -1508,60 +1614,46 @@
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "PATTERN",
"value": "[ \\t]*"
}
]
},
{
"type": "PATTERN",
"value": "[,=-]"
},
{
"type": "PATTERN",
"value": "[^\\\\\\[\\n#\\s,=-][^\\\\\\n]*"
"value": "[^\\\\\\[\\n#\\s,=-][^\\\\\\n<]*"
},
{
"type": "PATTERN",
"value": "\\\\[^\\n,=-]"
},
{
"type": "PATTERN",
"value": "<[^<]"
}
]
}
},
"line_continuation": {
"type": "STRING",
"value": "\\\n"
"type": "PATTERN",
"value": "\\\\[ \\t]*\\n"
},
"required_line_continuation": {
"type": "STRING",
"value": "\\\n"
},
"_comment_line": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_anon_comment"
},
"named": true,
"value": "comment"
},
{
"type": "STRING",
"value": "\n"
}
]
},
"_anon_comment": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "#"
},
{
"type": "PATTERN",
"value": ".*"
}
]
},
"json_string_array": {
"type": "SEQ",
"members": [
@ -1797,8 +1889,11 @@
}
},
"_non_newline_whitespace": {
"type": "PATTERN",
"value": "[\\t ]+"
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "[\\t ]+"
}
},
"comment": {
"type": "PATTERN",
@ -1813,12 +1908,36 @@
{
"type": "SYMBOL",
"name": "line_continuation"
},
{
"type": "SYMBOL",
"name": "comment"
}
],
"conflicts": [],
"precedences": [],
"externals": [],
"externals": [
{
"type": "SYMBOL",
"name": "heredoc_marker"
},
{
"type": "SYMBOL",
"name": "heredoc_line"
},
{
"type": "SYMBOL",
"name": "heredoc_end"
},
{
"type": "SYMBOL",
"name": "heredoc_nl"
},
{
"type": "SYMBOL",
"name": "error_sentinel"
}
],
"inline": [],
"supertypes": []
}

97
src/node-types.json generated
View file

@ -7,6 +7,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "heredoc_block",
"named": true
},
{
"type": "param",
"named": true
@ -71,11 +75,6 @@
]
}
},
{
"type": "comment",
"named": true,
"fields": {}
},
{
"type": "copy_instruction",
"named": true,
@ -84,6 +83,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "heredoc_block",
"named": true
},
{
"type": "param",
"named": true
@ -275,6 +278,25 @@
]
}
},
{
"type": "heredoc_block",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "heredoc_end",
"named": true
},
{
"type": "heredoc_line",
"named": true
}
]
}
},
{
"type": "image_alias",
"named": true,
@ -458,11 +480,6 @@
}
}
},
{
"type": "line_continuation",
"named": true,
"fields": {}
},
{
"type": "maintainer_instruction",
"named": true,
@ -606,6 +623,10 @@
{
"type": "expansion",
"named": true
},
{
"type": "heredoc_marker",
"named": true
}
]
}
@ -618,6 +639,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "heredoc_block",
"named": true
},
{
"type": "json_string_array",
"named": true
@ -645,10 +670,6 @@
"multiple": true,
"required": true,
"types": [
{
"type": "comment",
"named": true
},
{
"type": "line_continuation",
"named": true
@ -663,7 +684,17 @@
{
"type": "shell_fragment",
"named": true,
"fields": {}
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "heredoc_marker",
"named": true
}
]
}
},
{
"type": "shell_instruction",
@ -715,10 +746,6 @@
"type": "cmd_instruction",
"named": true
},
{
"type": "comment",
"named": true
},
{
"type": "copy_instruction",
"named": true
@ -884,10 +911,6 @@
"type": "\"",
"named": false
},
{
"type": "#",
"named": false
},
{
"type": "$",
"named": false
@ -1016,10 +1039,6 @@
"type": "\\",
"named": false
},
{
"type": "\\\n",
"named": false
},
{
"type": "\\ ",
"named": false
@ -1028,10 +1047,34 @@
"type": "]",
"named": false
},
{
"type": "_heredoc_nl",
"named": false
},
{
"type": "comment",
"named": true
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "heredoc_end",
"named": true
},
{
"type": "heredoc_line",
"named": true
},
{
"type": "heredoc_marker",
"named": true
},
{
"type": "line_continuation",
"named": true
},
{
"type": "mount",
"named": false

12912
src/parser.c generated

File diff suppressed because it is too large Load diff

318
src/scanner.c Normal file
View file

@ -0,0 +1,318 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include "tree_sitter/parser.h"
#define MAX_HEREDOCS 10
#define DEL_SPACE 512
typedef struct {
bool in_heredoc;
bool stripping_heredoc;
unsigned heredoc_count;
char *heredocs[MAX_HEREDOCS];
} scanner_state;
enum TokenType {
HEREDOC_MARKER,
HEREDOC_LINE,
HEREDOC_END,
HEREDOC_NL,
ERROR_SENTINEL,
};
void *tree_sitter_dockerfile_external_scanner_create() {
scanner_state *state = malloc(sizeof(scanner_state));
memset(state, 0, sizeof(scanner_state));
return state;
}
void tree_sitter_dockerfile_external_scanner_destroy(void *payload) {
if (!payload)
return;
scanner_state *state = payload;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
if (state->heredocs[i]) {
free(state->heredocs[i]);
}
}
free(state);
}
unsigned tree_sitter_dockerfile_external_scanner_serialize(void *payload,
char *buffer) {
scanner_state *state = payload;
unsigned pos = 0;
buffer[pos++] = state->in_heredoc;
buffer[pos++] = state->stripping_heredoc;
for (unsigned i = 0; i < state->heredoc_count; i++) {
// Add the ending null byte to the length since we'll have to copy it as
// well.
unsigned len = strlen(state->heredocs[i]) + 1;
// If we run out of space, just drop the heredocs that don't fit.
// We need at least len + 1 bytes space since we'll copy len bytes below
// and later add a null byte at the end.
if (pos + len + 1 > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
break;
}
memcpy(&buffer[pos], state->heredocs[i], len);
pos += len;
}
// Add a null byte at the end to make it easy to detect.
buffer[pos++] = 0;
return pos;
}
void tree_sitter_dockerfile_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
scanner_state *state = payload;
// Free all current heredocs to avoid leaking memory when we overwrite the
// array later.
for (unsigned i = 0; i < state->heredoc_count; i++) {
free(state->heredocs[i]);
state->heredocs[i] = NULL;
}
if (length == 0) {
state->in_heredoc = false;
state->stripping_heredoc = false;
state->heredoc_count = 0;
} else {
unsigned pos = 0;
state->in_heredoc = buffer[pos++];
state->stripping_heredoc = buffer[pos++];
unsigned heredoc_count = 0;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
unsigned len = strlen(&buffer[pos]);
// We found the ending null byte which means that we're done.
if (len == 0)
break;
// Account for the ending null byte in strings (again).
len++;
char *heredoc = malloc(len);
memcpy(heredoc, &buffer[pos], len);
state->heredocs[i] = heredoc;
heredoc_count++;
pos += len;
}
state->heredoc_count = heredoc_count;
}
}
static void skip_whitespace(TSLexer *lexer) {
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
iswspace(lexer->lookahead))
lexer->advance(lexer, true);
}
static bool scan_marker(scanner_state *state, TSLexer *lexer) {
skip_whitespace(lexer);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
bool stripping = false;
if (lexer->lookahead == '-') {
stripping = true;
lexer->advance(lexer, false);
}
int32_t quote = 0;
if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
quote = lexer->lookahead;
lexer->advance(lexer, false);
}
// Reserve a reasonable amount of space for the heredoc delimiter string.
// Most heredocs (like EOF, EOT, EOS, FILE, etc.) are pretty short so we'll
// usually only need a few bytes. We're also limited to less than 1024 bytes
// by tree-sitter since our state has to fit in
// TREE_SITTER_SERIALIZATION_BUFFER_SIZE.
char delimiter[DEL_SPACE];
// We start recording the actual string at position 1 since we store whether
// it's a stripping heredoc in the first position (with either a dash or a
// space).
unsigned del_idx = 1;
while (lexer->lookahead != '\0' &&
(quote ? lexer->lookahead != quote : !iswspace(lexer->lookahead))) {
if (lexer->lookahead == '\\') {
lexer->advance(lexer, false);
if (lexer->lookahead == '\0') {
return false;
}
}
if (del_idx > 0) {
delimiter[del_idx++] = lexer->lookahead;
}
lexer->advance(lexer, false);
// If we run out of space, stop recording the delimiter but keep
// advancing the lexer to ensure that we at least parse the marker
// correctly. Reserve two bytes: one for the strip indicator and
// one for the terminating null byte.
if (del_idx >= DEL_SPACE - 2) {
del_idx = 0;
}
}
if (quote) {
if (lexer->lookahead != quote) {
return false;
}
lexer->advance(lexer, false);
}
if (del_idx == 0) {
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
delimiter[0] = stripping ? '-' : ' ';
delimiter[del_idx] = '\0';
// We copy the delimiter string to the heap here since we can't store our
// stack-allocated string in our state (which is stored on the heap).
char *del_copy = malloc(del_idx + 1);
memcpy(del_copy, delimiter, del_idx + 1);
if (state->heredoc_count == 0) {
state->heredoc_count = 1;
state->heredocs[0] = del_copy;
state->stripping_heredoc = stripping;
} else if (state->heredoc_count >= MAX_HEREDOCS) {
free(del_copy);
} else {
state->heredocs[state->heredoc_count++] = del_copy;
}
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
static bool scan_content(scanner_state *state, TSLexer *lexer,
const bool *valid_symbols) {
if (state->heredoc_count == 0) {
state->in_heredoc = false;
return false;
}
state->in_heredoc = true;
if (state->stripping_heredoc) {
skip_whitespace(lexer);
}
if (valid_symbols[HEREDOC_END]) {
unsigned delim_idx = 1;
// Look for the current heredoc delimiter.
while (state->heredocs[0][delim_idx] != '\0' &&
lexer->lookahead != '\0' &&
lexer->lookahead == state->heredocs[0][delim_idx]) {
lexer->advance(lexer, false);
delim_idx++;
}
// Check if the entire string matched.
if (state->heredocs[0][delim_idx] == '\0') {
lexer->result_symbol = HEREDOC_END;
// Shift the first heredoc off the list.
free(state->heredocs[0]);
for (unsigned i = 1; i < state->heredoc_count; i++) {
state->heredocs[i - 1] = state->heredocs[i];
}
state->heredocs[state->heredoc_count - 1] = NULL;
state->heredoc_count--;
if (state->heredoc_count > 0) {
state->stripping_heredoc = state->heredocs[0][0] == '-';
} else {
state->in_heredoc = false;
}
return true;
}
}
if (!valid_symbols[HEREDOC_LINE])
return false;
lexer->result_symbol = HEREDOC_LINE;
for (;;) {
switch (lexer->lookahead) {
case '\0':
if (lexer->eof(lexer)) {
state->in_heredoc = false;
return true;
}
lexer->advance(lexer, false);
break;
case '\n':
return true;
default:
lexer->advance(lexer, false);
}
}
}
bool tree_sitter_dockerfile_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
scanner_state *state = payload;
if (valid_symbols[ERROR_SENTINEL]) {
if (state->in_heredoc) {
return scan_content(state, lexer, valid_symbols);
} else {
return scan_marker(state, lexer);
}
}
// HEREDOC_NL only matches a linebreak if there are open heredocs. This is
// necessary to avoid a conflict in the grammar since a normal line break
// could either be the start of a heredoc or the end of an instruction.
if (valid_symbols[HEREDOC_NL]) {
if (state->heredoc_count > 0 && lexer->lookahead == '\n') {
lexer->result_symbol = HEREDOC_NL;
lexer->advance(lexer, false);
return true;
}
}
if (valid_symbols[HEREDOC_MARKER]) {
return scan_marker(state, lexer);
}
if (valid_symbols[HEREDOC_LINE] || valid_symbols[HEREDOC_END]) {
return scan_content(state, lexer, valid_symbols);
}
return false;
}

54
src/tree_sitter/alloc.h generated Normal file
View file

@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

290
src/tree_sitter/array.h generated Normal file
View file

@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View file

@ -13,9 +13,8 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
@ -87,6 +86,11 @@ typedef union {
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
@ -126,13 +130,38 @@ struct TSLanguage {
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
@ -148,6 +177,17 @@ struct TSLanguage {
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
@ -166,7 +206,7 @@ struct TSLanguage {
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
@ -176,7 +216,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}
@ -184,7 +224,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}
@ -197,14 +237,15 @@ struct TSLanguage {
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \

View file

@ -9,15 +9,15 @@ COPY libsqlite3-pcre-install-alpine.sh /libsqlite3-pcre-install-alpine.sh
---
(source_file
(from_instruction
(image_spec
name: (image_name)
tag: (image_tag)
digest: (image_digest))
as: (image_alias))
(copy_instruction
(path)
(path)))
(from_instruction
(image_spec
name: (image_name)
tag: (image_tag)
digest: (image_digest))
as: (image_alias))
(copy_instruction
(path)
(path)))
==================
Run interrupted with comment
@ -31,13 +31,13 @@ RUN echo hello \
---
(source_file
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(comment)
(comment)
(shell_fragment))))
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(comment)
(comment)
(shell_fragment))))
==================
Run with immediate line continuation
@ -50,11 +50,11 @@ RUN \
---
(source_file
(run_instruction
(line_continuation)
(shell_command
(comment)
(shell_fragment))))
(run_instruction
(line_continuation)
(comment)
(shell_command
(shell_fragment))))
==================
Run with immediate continuation and comment
@ -68,10 +68,10 @@ RUN \
---
(source_file
(run_instruction
(line_continuation)
(shell_command
(shell_fragment)
(line_continuation)
(comment)
(shell_fragment))))
(run_instruction
(line_continuation)
(shell_command
(shell_fragment)
(line_continuation)
(comment)
(shell_fragment))))

View file

@ -139,3 +139,21 @@ ENV TZ America/Toronto
(env_pair
name: (unquoted_string)
value: (unquoted_string))))
================================================================================
Comment immediately after continuation
================================================================================
ENV \
# Where do you live?
TZ America/Toronto
--------------------------------------------------------------------------------
(source_file
(env_instruction
(line_continuation)
(comment)
(env_pair
name: (unquoted_string)
value: (unquoted_string))))

View file

@ -25,3 +25,16 @@ EXPOSE 80 90/tcp 100/udp
(expose_port)
(expose_port)
(expose_port)))
==================
Port range
==================
EXPOSE 9009-9010
---
(source_file
(expose_instruction
(expose_port)))

435
test/corpus/heredoc.txt Normal file
View file

@ -0,0 +1,435 @@
==================
Basic heredoc
==================
FROM busybox AS build
RUN <<EOF
echo "i am" >> /dest
whoami >> /dest
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Basic heredoc with space after marker
==================
FROM busybox AS build
RUN <<EOF
echo "i am" >> /dest
whoami >> /dest
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run heredoc
==================
FROM busybox AS build
SHELL ["/bin/awk"]
RUN <<EOF
BEGIN {
print "foo" > "/dest"
}
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(shell_instruction
(json_string_array
(json_string)))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run heredoc with shebang
==================
FROM busybox AS build
RUN <<EOF
#!/bin/awk -f
BEGIN {
print "hello" >> "/dest"
print "world" >> "/dest"
}
EOF
FROM scratch
COPY --from=build /dest /dest
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Run complex heredoc
==================
FROM busybox AS build
WORKDIR /dest
RUN cat <<EOF1 | tr '[:upper:]' '[:lower:]' > ./out1; \
cat <<EOF2 | tr '[:lower:]' '[:upper:]' > ./out2
hello WORLD
EOF1
HELLO world
EOF2
RUN <<EOF 3<<IN1 4<<IN2 awk -f -
BEGIN {
while ((getline line < "/proc/self/fd/3") > 0)
print tolower(line) > "./fd3"
while ((getline line < "/proc/self/fd/4") > 0)
print toupper(line) > "./fd4"
}
EOF
hello WORLD
IN1
HELLO world
IN2
FROM scratch
COPY --from=build /dest /
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(workdir_instruction
(path))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker))
(line_continuation)
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)
(heredoc_marker)
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
=================================
Copy with heredoc
=================================
FROM busybox AS build
RUN adduser -D user
WORKDIR /dest
COPY <<EOF single
single file
EOF
COPY <<EOF <<EOF2 double/
first file
EOF
second file
EOF2
RUN mkdir -p /permfiles
COPY --chmod=777 <<EOF /permfiles/all
dummy content
EOF
COPY --chmod=0644 <<EOF /permfiles/rw
dummy content
EOF
COPY --chown=user:user <<EOF /permfiles/owned
dummy content
EOF
RUN stat -c "%04a" /permfiles/all >> perms && \
stat -c "%04a" /permfiles/rw >> perms && \
stat -c "%U:%G" /permfiles/owned >> perms
FROM scratch
COPY --from=build /dest /
---
(source_file
(from_instruction
(image_spec
(image_name))
(image_alias))
(run_instruction
(shell_command
(shell_fragment)))
(workdir_instruction
(path))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end))
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(param)
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(shell_fragment)
(line_continuation)
(shell_fragment)))
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(param)
(path)
(path)))
==================
Heredoc with special symbols
==================
FROM scratch
COPY <<EOF quotefile
"quotes in file"
EOF
COPY <<EOF slashfile1
\
EOF
COPY <<EOF slashfile2
\\
EOF
COPY <<EOF slashfile3
\$
EOF
COPY <<"EOF" rawslashfile1
\
EOF
COPY <<"EOF" rawslashfile2
\\
EOF
COPY <<"EOF" rawslashfile3
\$
EOF
---
(source_file
(from_instruction
(image_spec
(image_name)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end)))
(copy_instruction
(path
(heredoc_marker))
(path)
(heredoc_block
(heredoc_line)
(heredoc_end))))

View file

@ -7,9 +7,9 @@ RUN echo "test"
---
(source_file
(run_instruction
(shell_command
(shell_fragment))))
(run_instruction
(shell_command
(shell_fragment))))
==================
Shell command multiline
@ -23,13 +23,28 @@ RUN echo "test" \
---
(source_file
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(shell_fragment)
(line_continuation)
(shell_fragment))))
(run_instruction
(shell_command
(shell_fragment)
(line_continuation)
(shell_fragment)
(line_continuation)
(shell_fragment))))
==================
Shell command multiline space after continuation
==================
RUN echo "test" \
echo "foo"
---
(source_file
(run_instruction
(shell_command
(shell_fragment
(line_continuation)))))
==================
Run with shell empty array
@ -40,8 +55,8 @@ run []
---
(source_file
(run_instruction
(json_string_array)))
(run_instruction
(json_string_array)))
==================
Run with shell array
@ -78,3 +93,22 @@ run --mount=type=secret,id=aws,target=/root/.aws/credentials \
(line_continuation)
(shell_command
(shell_fragment))))
==================
Run with heredoc
==================
RUN cat <<EOF
test
EOF
---
(source_file
(run_instruction
(shell_command
(shell_fragment
(heredoc_marker)))
(heredoc_block
(heredoc_line)
(heredoc_end))))