From 98fdcad2a01e7e7ef68bd12d4226dd1f193163ef Mon Sep 17 00:00:00 2001 From: Martin Jambon Date: Thu, 22 Jun 2023 19:44:07 -0700 Subject: [PATCH] Add support for single-quoted strings and distinguish JSON strings from the double-quoted strings that support variable expansion. --- corpus/cmd.txt | 10 +-- corpus/entrypoint.txt | 11 ++- corpus/run.txt | 10 +-- corpus/shell_instruction.txt | 6 +- corpus/strings.txt | 146 +++++++++++++++++++++++++++++++++++ corpus/volume.txt | 10 +-- grammar.js | 106 +++++++++++++++++-------- 7 files changed, 242 insertions(+), 57 deletions(-) create mode 100644 corpus/strings.txt diff --git a/corpus/cmd.txt b/corpus/cmd.txt index f4cf4c5..cd10dbf 100644 --- a/corpus/cmd.txt +++ b/corpus/cmd.txt @@ -41,7 +41,7 @@ cmd [] (source_file (cmd_instruction - (string_array))) + (json_string_array))) ================== Run with shell array @@ -52,8 +52,8 @@ cmd ["echo", "test"] --- (source_file - (cmd_instruction - (string_array - (double_quoted_string) - (double_quoted_string)))) + (cmd_instruction + (json_string_array + (json_string) + (json_string)))) diff --git a/corpus/entrypoint.txt b/corpus/entrypoint.txt index 4679603..998576e 100644 --- a/corpus/entrypoint.txt +++ b/corpus/entrypoint.txt @@ -41,7 +41,7 @@ ENTRYPOINT [] (source_file (entrypoint_instruction - (string_array))) + (json_string_array))) ================== Run with shell array @@ -52,8 +52,7 @@ ENTRYPOINT ["echo", "test"] --- (source_file - (entrypoint_instruction - (string_array - (double_quoted_string) - (double_quoted_string)))) - + (entrypoint_instruction + (json_string_array + (json_string) + (json_string)))) diff --git a/corpus/run.txt b/corpus/run.txt index 3595d6a..4ff7fbb 100644 --- a/corpus/run.txt +++ b/corpus/run.txt @@ -41,7 +41,7 @@ run [] (source_file (run_instruction - (string_array))) + (json_string_array))) ================== Run with shell array @@ -52,10 +52,10 @@ run ["echo", "test"] --- (source_file - (run_instruction - (string_array - (double_quoted_string) - (double_quoted_string)))) + (run_instruction + (json_string_array + (json_string) + (json_string)))) ================== Run with options diff --git a/corpus/shell_instruction.txt b/corpus/shell_instruction.txt index cea39c8..8b2d0a7 100644 --- a/corpus/shell_instruction.txt +++ b/corpus/shell_instruction.txt @@ -8,7 +8,7 @@ SHELL ["powershell","-command"] (source_file (shell_instruction - (string_array - (double_quoted_string) - (double_quoted_string)))) + (json_string_array + (json_string) + (json_string)))) diff --git a/corpus/strings.txt b/corpus/strings.txt new file mode 100644 index 0000000..68d2e98 --- /dev/null +++ b/corpus/strings.txt @@ -0,0 +1,146 @@ +========================== +JSON string arrays +========================== + +CMD ["a", "bcd"] +CMD ["\"\\"] +CMD ["\b\f\n\r\t\u0ABC\u12Cd"] + +--- + +(source_file + (cmd_instruction + (json_string_array + (json_string) + (json_string))) + (cmd_instruction + (json_string_array + (json_string))) + (cmd_instruction + (json_string_array + (json_string)))) + +========================== +Double-quoted strings +========================== + +ENV KEY "value" +ENV KEY "conti\ +nue" +ENV KEY "quotes: \" '" +ENV KEY "backslash: \\" +ENV KEY "not escape sequences: \a \n" +ENV KEY "expansions: $FOO ${BAR}" + +--- + +(source_file + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string))) + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string + (line_continuation)))) + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string + (double_quoted_escape_sequence)))) + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string + (double_quoted_escape_sequence)))) + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string + (double_quoted_escape_sequence) + (double_quoted_escape_sequence)))) + (env_instruction + (env_pair + (unquoted_string) + (double_quoted_string + (expansion + (variable)) + (expansion + (variable)))))) + +========================== +Single-quoted strings +========================== + +ENV KEY 'value' +ENV KEY 'conti\ +nue' +ENV KEY 'quotes: \' \'' +ENV KEY 'backslash: \\' +ENV KEY 'not escape sequences: \a \n' + +--- + +(source_file + (env_instruction + (env_pair + (unquoted_string) + (single_quoted_string))) + (env_instruction + (env_pair + (unquoted_string) + (single_quoted_string + (line_continuation)))) + (env_instruction + (env_pair + (unquoted_string) + (single_quoted_string + (single_quoted_escape_sequence) + (single_quoted_escape_sequence)))) + (env_instruction + (env_pair + (unquoted_string) + (single_quoted_string + (single_quoted_escape_sequence)))) + (env_instruction + (env_pair + (unquoted_string) + (single_quoted_string + (single_quoted_escape_sequence) + (single_quoted_escape_sequence))))) + +========================== +Unquoted strings +========================== + +ENV KEY value +ENV KEY two\ words +ENV KEY conti\ +nue +ENV KEY $FOO${BAR}baz + +--- + +(source_file + (env_instruction + (env_pair + (unquoted_string) + (unquoted_string))) + (env_instruction + (env_pair + (unquoted_string) + (unquoted_string))) + (env_instruction + (env_pair + (unquoted_string) + (unquoted_string + (line_continuation)))) + (env_instruction + (env_pair + (unquoted_string) + (unquoted_string + (expansion + (variable)) + (expansion + (variable)))))) diff --git a/corpus/volume.txt b/corpus/volume.txt index 310b372..8a9eba1 100644 --- a/corpus/volume.txt +++ b/corpus/volume.txt @@ -49,8 +49,8 @@ VOLUME ["/test/myvol"] (source_file (volume_instruction - (string_array - (double_quoted_string)))) + (json_string_array + (json_string)))) ================== Multiline array @@ -63,7 +63,7 @@ VOLUME ["/test/myvol", \ (source_file (volume_instruction - (string_array - (double_quoted_string) + (json_string_array + (json_string) (line_continuation) - (double_quoted_string)))) + (json_string)))) diff --git a/grammar.js b/grammar.js index 5f23126..77ef352 100644 --- a/grammar.js +++ b/grammar.js @@ -46,13 +46,13 @@ module.exports = grammar({ $.mount_param ) ), - choice($.string_array, $.shell_command) + choice($.json_string_array, $.shell_command) ), cmd_instruction: ($) => seq( alias(/[cC][mM][dD]/, "CMD"), - choice($.string_array, $.shell_command) + choice($.json_string_array, $.shell_command) ), label_instruction: ($) => @@ -93,14 +93,14 @@ module.exports = grammar({ entrypoint_instruction: ($) => seq( alias(/[eE][nN][tT][rR][yY][pP][oO][iI][nN][tT]/, "ENTRYPOINT"), - choice($.string_array, $.shell_command) + choice($.json_string_array, $.shell_command) ), volume_instruction: ($) => seq( alias(/[vV][oO][lL][uU][mM][eE]/, "VOLUME"), choice( - $.string_array, + $.json_string_array, seq($.path, repeat(seq($._non_newline_whitespace, $.path))) ) ), @@ -144,7 +144,12 @@ module.exports = grammar({ optional( seq( token.immediate("="), - field("default", choice($.double_quoted_string, $.unquoted_string)) + field("default", + choice( + $.double_quoted_string, + $.single_quoted_string, + $.unquoted_string + )) ) ) ), @@ -171,7 +176,7 @@ module.exports = grammar({ ), shell_instruction: ($) => - seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.string_array), + seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.json_string_array), maintainer_instruction: ($) => seq( @@ -222,7 +227,12 @@ module.exports = grammar({ field("name", $._env_key), token.immediate("="), optional( - field("value", choice($.double_quoted_string, $.unquoted_string)) + field("value", + choice( + $.double_quoted_string, + $.single_quoted_string, + $.unquoted_string + )) ) ), @@ -230,7 +240,12 @@ module.exports = grammar({ seq( field("name", $._env_key), token.immediate(/\s+/), - field("value", choice($.double_quoted_string, $.unquoted_string)) + field("value", + choice( + $.double_quoted_string, + $.single_quoted_string, + $.unquoted_string + )) ), _env_key: ($) => @@ -242,7 +257,12 @@ module.exports = grammar({ seq( field("key", alias(/[-a-zA-Z0-9\._]+/, $.unquoted_string)), token.immediate("="), - field("value", choice($.double_quoted_string, $.unquoted_string)) + field("value", + choice( + $.double_quoted_string, + $.single_quoted_string, + $.unquoted_string + )) ), image_spec: ($) => @@ -311,15 +331,6 @@ module.exports = grammar({ repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion)) ), - string_array: ($) => - seq( - "[", - optional( - seq($.double_quoted_string, repeat(seq(",", $.double_quoted_string))) - ), - "]" - ), - shell_command: ($) => seq( repeat($._comment_line), @@ -359,41 +370,70 @@ module.exports = grammar({ _anon_comment: ($) => seq("#", /.*/), + json_string_array: ($) => + seq( + "[", + optional( + seq($.json_string, repeat(seq(",", $.json_string))) + ), + "]" + ), + + // Note that JSON strings are different from the other double-quoted + // strings. They don't support $-expansions. + // Convenient reference: https://www.json.org/ + json_string: ($) => /"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"/, + double_quoted_string: ($) => seq( '"', repeat( choice( token.immediate(/[^"\n\\\$]+/), - $.escape_sequence, + $.double_quoted_escape_sequence, $._immediate_expansion ) ), '"' ), + // same as double_quoted_string but without $-expansions: + single_quoted_string: ($) => + seq( + "'", + repeat( + choice( + token.immediate(/[^'\n\\]+/), + $.single_quoted_escape_sequence + ) + ), + "'" + ), + unquoted_string: ($) => repeat1( choice( - token.immediate(/[^\s\n\"\\\$]+/), + token.immediate(/[^\s\n\"'\\\$]+/), token.immediate("\\ "), $._immediate_expansion ) ), - escape_sequence: ($) => - token.immediate( - seq( - "\\", - choice( - /[^xuU]/, - /\d{2,3}/, - /x[0-9a-fA-F]{2,}/, - /u[0-9a-fA-F]{4}/, - /U[0-9a-fA-F]{8}/ - ) - ) - ), + double_quoted_escape_sequence: ($) => token.immediate( + choice( + "\\\\", + "\\\"", + "\\" + ) + ), + + single_quoted_escape_sequence: ($) => token.immediate( + choice( + "\\\\", + "\\'", + "\\" + ) + ), _non_newline_whitespace: ($) => /[\t ]+/,