Skip to content

Commit

Permalink
Run clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
adisbladis committed Jul 7, 2023
1 parent 5497822 commit ef6443e
Showing 1 changed file with 77 additions and 75 deletions.
152 changes: 77 additions & 75 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@ enum TokenType {
INDENTED_DOLLAR_ESCAPE,
};

static void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

static void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }

static bool scan_dollar_escape(TSLexer *lexer) {
lexer->result_symbol = DOLLAR_ESCAPE;
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '$') {
return true;
if (lexer->lookahead == '$') {
return true;
} else {
return false;
}
Expand All @@ -32,8 +28,8 @@ static bool scan_indented_dollar_escape(TSLexer *lexer) {
lexer->result_symbol = INDENTED_DOLLAR_ESCAPE;
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '$') {
return true;
if (lexer->lookahead == '$') {
return true;
} else {
if (lexer->lookahead == '\\') {
advance(lexer);
Expand All @@ -47,7 +43,8 @@ static bool scan_indented_dollar_escape(TSLexer *lexer) {
}

// Here we only parse literal fragment inside a string.
// Delimiter, interpolation and escape sequence are handled by the parser and we simply stop at them.
// Delimiter, interpolation and escape sequence are handled by the parser and we
// simply stop at them.
//
// The implementation is inspired by tree-sitter-javascript:
// https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/src/scanner.c#L19
Expand All @@ -56,25 +53,25 @@ static bool scan_string_fragment(TSLexer *lexer) {
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '"':
case '\\':
case '"':
case '\\':
return has_content;
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
} else if (lexer->lookahead != '"' && lexer->lookahead != '\\') {
// Any char following '$' other than '"', '\\' and '{' (which was handled above)
// should be consumed as additional string content.
// This means `$${` doesn't start an interpolation, but `$$${` does.
advance(lexer);
}
break;
// Simply give up on EOF or '\0'.
case '\0':
return false;
default:
} else if (lexer->lookahead != '"' && lexer->lookahead != '\\') {
// Any char following '$' other than '"', '\\' and '{' (which was
// handled above) should be consumed as additional string content. This
// means `$${` doesn't start an interpolation, but `$$${` does.
advance(lexer);
}
break;
// Simply give up on EOF or '\0'.
case '\0':
return false;
default:
advance(lexer);
}
}
}
Expand All @@ -85,36 +82,39 @@ static bool scan_indented_string_fragment(TSLexer *lexer) {
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
} else if (lexer->lookahead != '\'') {
// Any char following '$' other than '\'' and '{' (which was handled above)
// should be consumed as additional string content.
// This means `$${` doesn't start an interpolation, but `$$${` does.
advance(lexer);
}
break;
case '\'':
advance(lexer);
if (lexer->lookahead == '\'') {
// Two single quotes always stop current string fragment.
// It can be either an end delimiter '', or escape sequences ''', ''$, ''\<any>
return has_content;
}
break;
// Simply give up on EOF or '\0'.
case '\0':
return false;
default:
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
} else if (lexer->lookahead != '\'') {
// Any char following '$' other than '\'' and '{' (which was handled
// above) should be consumed as additional string content. This means
// `$${` doesn't start an interpolation, but `$$${` does.
advance(lexer);
}
break;
case '\'':
advance(lexer);
if (lexer->lookahead == '\'') {
// Two single quotes always stop current string fragment.
// It can be either an end delimiter '', or escape sequences ''', ''$,
// ''\<any>
return has_content;
}
break;
// Simply give up on EOF or '\0'.
case '\0':
return false;
default:
advance(lexer);
}
}
}

static bool is_path_char(int32_t c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '+' || c == '_' || c == '.' || c == '/';
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') || c == '-' || c == '+' || c == '_' ||
c == '.' || c == '/';
}

static bool scan_path_start(TSLexer *lexer) {
Expand All @@ -124,9 +124,9 @@ static bool scan_path_start(TSLexer *lexer) {
bool have_after_sep = false;
int32_t c = lexer->lookahead;

// unlike string_fragments which which are preceded by initial token (i.e. '"')
// and thus will have all leading external whitespace consumed,
// we have no such luxury with the path_start token.
// unlike string_fragments which which are preceded by initial token (i.e.
// '"') and thus will have all leading external whitespace consumed, we have
// no such luxury with the path_start token.
//
// so we must skip over any leading whitespace here.
while (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
Expand Down Expand Up @@ -171,18 +171,16 @@ static bool scan_path_fragment(TSLexer *lexer) {
}
}

void *tree_sitter_nix_external_scanner_create() {
return NULL;
}
void *tree_sitter_nix_external_scanner_create() { return NULL; }

bool tree_sitter_nix_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
// This never happens in valid grammar. Only during error recovery, everything becomes valid.
// See: https://github.com/tree-sitter/tree-sitter/issues/1259
const bool *valid_symbols) {
// This never happens in valid grammar. Only during error recovery, everything
// becomes valid. See: https://github.com/tree-sitter/tree-sitter/issues/1259
//
// We should not consume any content as string fragment during error recovery, or we'll break
// more valid grammar below.
// The test 'attrset typing field following string' covers this.
// We should not consume any content as string fragment during error recovery,
// or we'll break more valid grammar below. The test 'attrset typing field
// following string' covers this.
if (valid_symbols[STRING_FRAGMENT] &&
valid_symbols[INDENTED_STRING_FRAGMENT] && valid_symbols[PATH_START] &&
valid_symbols[PATH_FRAGMENT] && valid_symbols[DOLLAR_ESCAPE] &&
Expand All @@ -203,21 +201,22 @@ bool tree_sitter_nix_external_scanner_scan(void *payload, TSLexer *lexer,
}
return scan_indented_string_fragment(lexer);
} else if (valid_symbols[PATH_FRAGMENT] && is_path_char(lexer->lookahead)) {
// path_fragments should be scanned as immediate tokens, with no preceding extras.
// so we assert that the very first token is a path character,
// and otherwise we fall through to the case below.
// example:
// path_fragments should be scanned as immediate tokens, with no preceding
// extras. so we assert that the very first token is a path character, and
// otherwise we fall through to the case below. example:
// a/b${c} d/e${f}
// ^--- note that scanning for the path_fragment will start here.
// this *should* be parsed as a function application.
// so we want to fall through to the path_start case below,
// which will skip the whitespace and correctly scan the following path_start.
// which will skip the whitespace and correctly scan the
// following path_start.
//
// also, we want this above path_start, because wherever there's ambiguity we want to parse another fragment
// instead of starting a new path.
// also, we want this above path_start, because wherever there's ambiguity
// we want to parse another fragment instead of starting a new path.
// example:
// a/b${c}d/e${f}
// if we swap the precedence, we'd effectively parse the above as the following function application:
// if we swap the precedence, we'd effectively parse the above as the
// following function application:
// (a/b${c}) (d/e${f})
return scan_path_fragment(lexer);
} else if (valid_symbols[PATH_START]) {
Expand All @@ -227,10 +226,13 @@ bool tree_sitter_nix_external_scanner_scan(void *payload, TSLexer *lexer,
return false;
}

unsigned tree_sitter_nix_external_scanner_serialize(void *payload, char *buffer) {
unsigned tree_sitter_nix_external_scanner_serialize(void *payload,
char *buffer) {
return 0;
}

void tree_sitter_nix_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { }
void tree_sitter_nix_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {}

void tree_sitter_nix_external_scanner_destroy(void *payload) { }
void tree_sitter_nix_external_scanner_destroy(void *payload) {}

0 comments on commit ef6443e

Please sign in to comment.