diff --git a/Cargo.lock b/Cargo.lock index 1c87a04..a51c3d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,8 +1,26 @@ +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ascii" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "atty" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "backtrace" version = "0.3.8" @@ -24,6 +42,11 @@ dependencies = [ "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bitflags" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.2.3" @@ -39,6 +62,20 @@ name = "cfg-if" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "clap" +version = "2.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "combine" version = "3.3.2" @@ -51,13 +88,23 @@ dependencies = [ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "dtoa" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ecmascript" -version = "0.1.0" +version = "0.2.0" dependencies = [ + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "combine 3.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -85,6 +132,16 @@ dependencies = [ "synstructure 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "glob" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "itoa" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "lazy_static" version = "1.0.1" @@ -103,16 +160,75 @@ dependencies = [ "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "proc-macro2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quote" version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "quote" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "redox_syscall" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rustc-demangle" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "serde" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_derive" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "strsim" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "syn" version = "0.11.11" @@ -123,6 +239,16 @@ dependencies = [ "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "syn" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "synom" version = "0.11.3" @@ -140,6 +266,29 @@ dependencies = [ "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "termion" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "textwrap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicode-xid" version = "0.0.4" @@ -158,6 +307,11 @@ dependencies = [ "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "void" version = "1.0.2" @@ -183,27 +337,47 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum ascii 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae7d751998c189c1d4468cf0a39bb2eae052a9c58d50ebb3b9591ee3813ad50" +"checksum atty 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2fc4a1aa4c24c0718a250f0681885c1af91419d242f29eb8f2ab28502d80dbd1" "checksum backtrace 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "dbdd17cd962b570302f5297aea8648d5923e22e555c2ed2d8b2e34eca646bf6d" "checksum backtrace-sys 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)" = "bff67d0c06556c0b8e6b5f090f0eac52d950d9dfd1d35ba04e4ca3543eaf6a7e" +"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789" "checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9" "checksum cc 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "49ec142f5768efb5b7622aebc3fdbdbb8950a4b9ba996393cb76ef7466e8747d" "checksum cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "405216fd8fe65f718daa7102ea808a946b6ce40c742998fbfd3463645552de18" +"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum combine 3.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "45d59f1cea36baded7f2261fc5ca7c70dec727ecaa039011b768ceb8a014401f" +"checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd" "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0" "checksum failure 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "934799b6c1de475a012a02dab0ace1ace43789ee4b99bcfbf1a2e3e8ced5de82" "checksum failure_derive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c7cdda555bb90c9bb67a3b670a0f42de8e73f5981524123ad8578aafec8ddb8b" +"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" +"checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606" "checksum lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e6412c5e2ad9584b0b8e979393122026cdd6d2a80b933f890dcd694ddbe73739" "checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1" "checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d" +"checksum proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "effdb53b25cdad54f8f48843d67398f7ef2e14f12c1b4cb4effc549a6462a4d6" "checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a" +"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035" +"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "76d7ba1feafada44f2d38eed812bd2489a03c0f5abb975799251518b68848649" +"checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920" +"checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124" +"checksum serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "84b8035cabe9b35878adec8ac5fe03d5f6bc97ff6edd7ccb96b44c1276ba390e" +"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" +"checksum syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)" = "2beff8ebc3658f07512a413866875adddd20f4fd47b2a4e6c9da65cd281baaea" "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" "checksum synstructure 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a761d12e6d8dcb4dcf952a7a89b475e3a9d69e4a69307e01a470977642914bd" +"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" +"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" diff --git a/Cargo.toml b/Cargo.toml index c77613c..465aa72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,45 @@ [package] -name = "ecmascript" -version = "0.1.0" authors = ["Nicholas Dujay "] +categories = [ + "parser-implementations", + "parsing", +] description = "A rust implementation of an ECMAScript parser" documentation = "https://docs.rs/ecmascript" homepage = "https://github.com/dat2/ecmascript" -repository = "https://github.com/dat2/ecmascript" -readme = "README.md" -keywords = ["ecmascript", "javascript", "parser"] -categories = [ - "parser-implementations", - "parsing" +keywords = [ + "ecmascript", + "javascript", + "parser", ] license = "MIT" +name = "ecmascript" +readme = "README.md" +repository = "https://github.com/dat2/ecmascript" +version = "0.2.0" -[badges] -travis-ci = { repository = "dat2/ecmascript", branch = "master" } -codecov = { repository = "dat2/ecmascript", branch = "master", service = "github" } -maintenance = { status = "actively-developed" } +[badges.codecov] +branch = "master" +repository = "dat2/ecmascript" +service = "github" -[lib] -name = "ecmascript" +[badges.maintenance] +status = "actively-developed" + +[badges.travis-ci] +branch = "master" +repository = "dat2/ecmascript" [dependencies] +clap = "2.32.0" combine = "3.3.2" failure = "0.1.1" +glob = "0.2.11" lazy_static = "1.0.1" +serde = "1.0.70" +serde_derive = "1.0.70" +serde_json = "1.0.22" unicode-xid = "0.1.0" + +[lib] +name = "ecmascript" diff --git a/README.md b/README.md index 9797a0d..f4842c6 100644 --- a/README.md +++ b/README.md @@ -54,15 +54,15 @@ Docs are hosted on [docs.rs](https://docs.rs/ecmascript/). on it. For example, concatenating modules together, uglifying the variable names, pretty printing uglified code, etc. -# Features +# Features (Planned) -* _ECMAScript 2017 v9.0 support_ +* _ECMAScript 2017 v9.0 support_ (WIP) * We are actively developing this library to be up to date! -* _JSX Extended Support_ +* _JSX Extended Support_ (WIP) * JSX is meant to be an additive extension to the language -* _AST Pretty Printer_ +* _AST Pretty Printer_ (WIP) * This supports minification options, such as 0 whitespace -* _AST rewrite rules_ +* _AST rewrite rules_ (WIP) * Eg. Constant folding, translating to older versions of the language, etc. # Testing diff --git a/src/ast.rs b/src/ast.rs index bcbfae7..f093ec2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,57 +1,115 @@ //! This module contains type definitions for the Abstract Syntax elements //! that make up the ECMAScript language. //! -//! The types have been designed to be easily understandable and readable. That means -//! we do not explicitly disallow invalid syntax trees in favour of simplicity. -//! For example, the TaggedTemplate is only allowed to have a TemplateLiteral -//! expression as its quasi, but we do not enforce this in the type definition -//! for the sake of brevity. +//! These types have been translated from the [estree spec](https://github.com/estree/estree/blob/master/es5.md). //! //! The macros `build_ast` and `match_ast` are meant to be the public API of this //! module as they abstract away the types in such a way so that the user of the library //! feels as if they are working with source text almost directly. +/// Position is a line and a column. The line is 1 indexed, and column is 0 indexed. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct Position { + /// The line number in the original source. This is 1 indexed. + pub line: usize, + /// The column number in the original source. This is 0 indexed. + pub column: usize, +} + +impl From<(usize, usize)> for Position { + fn from((line, column): (usize, usize)) -> Position { + Position { line, column } + } +} + +/// A SourceLocation is where the node starts, and ends. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct SourceLocation { + /// The start of the syntax token / element. + pub start: Position, + /// The end of the syntax token / element. + pub end: Position, +} + +impl> From<(P, P)> for SourceLocation { + fn from((start, end): (P, P)) -> SourceLocation { + SourceLocation { + start: start.into(), + end: end.into(), + } + } +} + +/// Id is an identifier in the ecmascript language. +/// eg. `var foo = {};` +/// `foo` is the identifier. +/// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-identifier-names). +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct Identifier(pub Option, pub String); + +/// This represents the Literal production of the PrimaryExpression rule. +/// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#prod-Literal) +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(untagged)] +pub enum Literal { + /// This is a wrapper around the null literal. + NullLiteral(NullLiteral), + /// This is a wrapper around the boolean literal. + BooleanLiteral(BooleanLiteral), + /// This is a wrapper around the number literal. + NumericLiteral(NumericLiteral), + /// This is a wrapper around the string literal. + StringLiteral(StringLiteral), + /// This is a wrapper around the regexp literal. + RegExpLiteral(RegExpLiteral), +} + /// NullLiteral is the syntax element for `null`. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-null-literals) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct NullLiteral; /// BooleanLiteral is the syntax element for `true` and `false`. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-boolean-literals) -pub type BooleanLiteral = bool; +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct BooleanLiteral(pub bool); -/// NumberLiteral is the syntax element for numbers. The parser will convert the string +/// NumericLiteral is the syntax element for numbers. The parser will convert the string /// values into an f64 for the sake of simplicity. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-numeric-literals) -pub type NumberLiteral = f64; +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct NumericLiteral(pub f64); /// StringLiteral is a syntax element with quotes (single or double). /// eg. `'my string literal'` or `"my other string literal"` /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-literals-string-literals) -pub type StringLiteral = String; - -/// Id is an identifier in the ecmascript language. -/// eg. `var foo = {};` -/// `foo` is the identifier. -/// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-identifier-names). -pub type Id = String; +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct StringLiteral(pub String); -/// RegexLiteral is the syntax element of a regular expression. +/// RegExpLiteral is the syntax element of a regular expression. /// eg. `/abc[123]/gi` /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-literals-regular-expression-literals) -#[derive(Debug, Clone, PartialEq)] -pub struct RegexLiteral { +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct RegExpLiteral { /// This is the text between the slashes. pub pattern: String, /// This is the text after the slashes. eg the `i` flag is the case insensitive flag. pub flags: String, } +// programs + +// functions + +// statements + +// expressions + /// TemplateElement is any text between interpolated expressions inside a template literal. /// eg. ``abc ${} \u{2028}`` /// "abc " and " \u{2028}" would be the TemplateElements for this template literal. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-template-literal-lexical-components) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct TemplateElement { /// If the template element has any sort of escape sequences (eg. \u{2028}) /// this will represent the evaluated result of that sequence. @@ -60,6 +118,8 @@ pub struct TemplateElement { /// This will store the exact string value, before being evaluted into the unicode /// code points. pub raw: String, + /// This is the source location of the template element + pub loc: Option, } /// Expression is an enumeration of all possible expressions merged into one big enum. @@ -72,26 +132,50 @@ pub struct TemplateElement { /// [Left Hand Side Expressions](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-left-hand-side-expressions) /// [Update Expressions](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-update-expressions) /// [JSX Specification](https://facebook.github.io/jsx/) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(tag = "type")] pub enum Expression { - /// The 'this' keyword is a primary expression. - This, /// An identifier can also be a primary expression. - IdReference(Id), + Identifier { + /// The actual identifier name. + name: String, + /// The source location of the expression. + loc: Option, + }, /// This is all literals minus the regex literal and the template literal. - Literal(ExpressionLiteral), + Literal { + /// This is the value of the literal expression. + value: Literal, + /// This is the location where the expression happens. + loc: Option, + }, + /// The 'this' keyword is a primary expression. + ThisExpression { + /// The source location of the expression. + loc: Option, + }, /// This is an expression created with [] brackets. - ArrayLiteral(Vec), + ArrayExpression { + /// This is the list of elements in the array expression. + elements: Vec, + /// This is the location where the expression starts. + loc: Option, + }, /// This is an expression created by using {} brackets. - ObjectLiteral(Vec), + ObjectExpression { + /// This is the list of properties for the object. + properties: Vec, + /// This is the location where the expression starts. + loc: Option, + }, /// A function expression is a function defined in an expression position. /// Arrow functions are one where the body is a single statement that is an expression /// statement. - Function { + FunctionExpression { /// A function expression can be anonymous, where it has no name. - id: Option, + id: Option, /// The formal parameters to a function. - params: Vec, + params: Vec, /// The body is a list of statements. This can include pragmas. body: Vec, /// This is true if the function was defined with the `async` keyword before the @@ -100,53 +184,111 @@ pub enum Expression { /// This is true if there is a `*` character after the `function` keyword. generator: bool, }, - // Class, - /// A regex literal can be used in expression position. - /// eg (/asd/.test(123)) - RegexLiteral(RegexLiteral), - /// A Template literal expression has many template elements with expressions littered - /// between. - /// - /// When a template literal gets passed to the tagged template, it usually gets split into - /// the quasis (the pieces between the interpolated expressions) as an array for the first - /// argument, and the expressions get spread into the rest of the function call. - /// - /// For the sake of simplicity, we are not representing this in the AST. - TemplateLiteral(Vec), - /// A spread expression is an expression of the form `...()`. - Spread(Box), + /// A unary expression is a unary operator in prefix position to the operand. + UnaryExpression { + /// The operator is one that can only take a single operand. + operator: UnaryOperator, + /// This is just for estree. + prefix: bool, + /// The expression is the operand that is passed to the operator. + argument: Box, + }, + /// An update expression is either a postfix or prefix, increment or decrement, operator + /// applied to an operand. + UpdateExpression { + /// The operator is either ++ or -- + operator: UpdateOperator, + /// The argument is another expression, eg. (++(a)) + argument: Box, + /// This tells you if the operator is in prefix or postfix position. + prefix: bool, + }, + /// The binary expression is one of the form (lhs operand rhs). + BinaryExpression { + /// The operand that is infixed between the operands. + operator: BinaryOperator, + /// The left hand side. + left: Box, + /// The right hand side. + right: Box, + }, + /// An assignment operator is one of the form (lhs assigned rhs). This changes the left hand + /// side of the expression by applying an operator to the right hand side and the left hand + /// side to get the new value of the left hand side. + AssignmentExpression { + /// The operator that is between the operands. This is slightly different to the binary + /// expression, as it changes the LHS. The binary operators will return a new value + /// instead of changing the left hand side. + operator: AssignmentOperator, + /// The expression that gets changed in some way. eg. (id = some_new_value) + left: Box, + /// The expression that changes the lhs. + right: Box, + }, + /// The logical expression is a binary expression for logical operators only + LogicalExpression { + /// The operand that is infixed between the operands. + operator: LogicalOperator, + /// The left hand side. + left: Box, + /// The right hand side. + right: Box, + }, /// A member expression is a property access expression. /// Eg. `obj.key` or `obj[computed_key]` - Member { - /// The lhs is the object we're trying to access. - lhs: Box, - /// The rhs is the key we're trying to access. It can be computed, or a basic + MemberExpression { + /// The object we're trying to access. + object: Box, + /// The property we're trying to access. It can be computed, or a basic /// IdReference. - rhs: Box, + property: Box, /// This is true if the rhs was written with `[]` notation. computed: bool, }, - /// Super is the `super` keyword, similar to the `this` keyword. - Super, - /// This is the `new.target` expression that was introduced in ES2015. This - /// tells you if the function was called with the `new` operator. - MetaProperty, - /// This is the `new MemberExpression` expression. It will construct the callee - /// and return an object. - New { - /// The callee is the function we are trying to construct. - callee: Box, - /// The arguments is a list of parameters to the function we're trying to construct. - arguments: Vec, + /// The ternary operator. This is of the form (test ? alternate : consequent) + /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-conditional-operator) + ConditionalExpression { + /// The expression before the ?. This must evaluate to a truthy or falsy value. + test: Box, + /// The expression returned if the test expression is truthy. + alternate: Box, + /// The expression returned if the test expression is falsy. + consequent: Box, }, /// This is a regular function call, eg. `myFunction(expr1, expr2)` - Call { + CallExpression { /// The callee is the function we're trying to call. It may be an IIFE (immediately /// invoked function expression) or any other dynamic function. callee: Box, /// The list of parameters to pass to the function. arguments: Vec, }, + /// This is the `new MemberExpression` expression. It will construct the callee + /// and return an object. + NewExpression { + /// The callee is the function we are trying to construct. + callee: Box, + /// The arguments is a list of parameters to the function we're trying to construct. + arguments: Vec, + }, + /// This represents a comma expression, eg. (a, b). This will evaluate the first operand, + /// throw it away, and return the second operand. + /// + /// For a list of operands, it will evaluate all operands, throw them away, and then + /// finally return the last operand. + /// + /// This is mainly useful for side effects, eg. (console.log(expr), expr). + /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-comma-operator) + SequenceExpression { + /// This is the list of expressions separated by a comma. + expressions: Vec, + }, + + /// Super is the `super` keyword, similar to the `this` keyword. + Super, + /// This is the `new.target` expression that was introduced in ES2015. This + /// tells you if the function was called with the `new` operator. + MetaProperty, /// This is an expression where we pass the elements of the template literal to the /// tag function. /// @@ -165,55 +307,7 @@ pub enum Expression { /// AST. quasi: Box, }, - /// An update expression is either a postfix or prefix, increment or decrement, operator - /// applied to an operand. - Update { - /// The operator is either ++ or -- - operator: UpdateOperator, - /// The argument is another expression, eg. (++(a)) - argument: Box, - /// This tells you if the operator is in prefix or postfix position. - prefix: bool, - }, - /// A unary expression is a unary operator in prefix position to the operand. - Unary { - /// The operator is one that can only take a single operand. - operator: UnaryOperator, - /// The expression is the operand that is passed to the operator. - argument: Box, - }, - /// The binary expression is one of the form (lhs operand rhs). - Binary { - /// The operand that is infixed between the operands. - operator: BinaryOperator, - /// The left hand side. - lhs: Box, - /// The right hand side. - rhs: Box, - }, - /// The ternary operator. This is of the form (test ? alternate : consequent) - /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-conditional-operator) - Conditional { - /// The expression before the ?. This must evaluate to a truthy or falsy value. - test: Box, - /// The expression returned if the test expression is truthy. - alternate: Box, - /// The expression returned if the test expression is falsy. - consequent: Box, - }, - /// An assignment operator is one of the form (lhs assigned rhs). This changes the left hand - /// side of the expression by applying an operator to the right hand side and the left hand - /// side to get the new value of the left hand side. - Assignment { - /// The operator that is between the operands. This is slightly different to the binary - /// expression, as it changes the LHS. The binary operators will return a new value - /// instead of changing the left hand side. - operator: AssignmentOperator, - /// The expression that gets changed in some way. eg. (id = some_new_value) - lhs: Box, - /// The expression that changes the lhs. - rhs: Box, - }, + // ArrowFunctionExpression /// The yield expression that is only valid inside a generator function. /// It is a syntax error if there is a yield expression in the body of a non generator /// function. @@ -225,50 +319,60 @@ pub enum Expression { /// until the delegate generator completes. delegate: bool, // yield * }, - /// This represents a comma expression, eg. (a, b). This will evaluate the first operand, - /// throw it away, and return the second operand. + // Class, + /// A Template literal expression has many template elements with expressions littered + /// between. /// - /// For a list of operands, it will evaluate all operands, throw them away, and then - /// finally return the last operand. + /// When a template literal gets passed to the tagged template, it usually gets split into + /// the quasis (the pieces between the interpolated expressions) as an array for the first + /// argument, and the expressions get spread into the rest of the function call. /// - /// This is mainly useful for side effects, eg. (console.log(expr), expr). - /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-comma-operator) - Comma(Vec), + /// For the sake of simplicity, we are not representing this in the AST. + TemplateLiteral { + /// Quasis includes all strings parsed by the template literal. + quasis: Vec, + /// All expressions that are interpolated into the final string. + expressions: Vec, + /// This is the location where the expression starts. + loc: Option, + }, /// *NOTE*: This is an extension to the language proposed by facebook. /// The JsxElement is an inlined expression of the form: /// /// The JsxElement must be matched by a closing element, or else it is a syntax error. - JsxElement { + JsxElementExpression { /// The name of the element to construct. name: String, /// The key={value} pairs. attributes: Vec, /// The child elements. children: Vec, + /// The source location of the element. + loc: Option, }, ///*NOTE*: This is an extension to the language proposed by facebook. /// This is an anonymous JsxElement, used when you want to return an array of /// elements without actually wrapping things into an unneeded DOM element. - JsxFragment(Vec), + JsxFragment { + /// The child expressions of the fragment + children: Vec, + /// The source location in code + loc: Option, + }, } -/// This represents the Literal production of the PrimaryExpression rule. -/// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#prod-Literal) -#[derive(Debug, Clone, PartialEq)] -pub enum ExpressionLiteral { - /// This is a wrapper around the null literal. - NullLiteral(NullLiteral), - /// This is a wrapper around the boolean literal. - BooleanLiteral(BooleanLiteral), - /// This is a wrapper around the number literal. - NumberLiteral(NumberLiteral), - /// This is a wrapper around the string literal. - StringLiteral(StringLiteral), +/// A pattern is any way you can destructure an object or array. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub enum ExpressionListItem { + /// This is just a regular expression. + Expression(Expression), + /// This prevents a spread expression from being in an invalid syntax tree. + Spread(Option, Expression), } /// An object property is a tuple of a key, value, and a tag representing what kind of /// property it is. -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct Property { /// The key can be a computed expression, or an id reference. pub key: Expression, @@ -276,10 +380,18 @@ pub struct Property { pub value: Expression, /// The kind tells us if this is a getter, setter, or basic initializer. pub kind: PropertyKind, + /// This tells us if the property was defined as a shorthand function expression. + pub method: bool, + /// This tells us if the key and value were exactly the same as an Identifier. + pub shorthand: bool, + /// This tells us if the key is more than just a basic literal or Identifier. + pub computed: bool, + /// This tells us where the property is defined. + pub loc: Option, } /// An object property can be a getter, setter, or basic initializer. -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum PropertyKind { /// This just means the value is initialized to the expression. This is the default. Init, @@ -292,10 +404,43 @@ pub enum PropertyKind { Set, } +/// A pattern is any way you can destructure an object or array. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub enum Pattern { + /// This is a regular IdReference pattern. + Identifier(Identifier), + /// This allows you to destructure objects. + Object(Vec), + /// This allows you to destructure arrays. + Array(Vec), + /// This allows you to collect the "rest" of properties or elements + /// in an array into a single parameter. + /// This is only allowed within the Array or Object patterns. + Rest(Identifier), + /// This allows you to set a default value for a pattern. + /// eg. const { x = 1 } + Default { + /// The pattern that you are setting a default for. + /// It is a syntax error for the pattern to be a Rest pattern. + pattern: Box, + /// The value you set the default to. + default: Expression, + }, +} + +/// This is a restricted version of a Property that only allows patterns as the value. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct ObjectPatternProperty { + /// The key can still be an id reference, or computed. + pub key: Expression, + /// The value however is now another pattern. + pub value: Pattern, +} + /// A template literal element can either be the string between backticks and `${` /// or the expression between `${` and `}`. /// This is easier than trying to re-construct the order. -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum TemplateLiteralElement { /// A TemplateElement is the strings between the interpolated expressions. TemplateElement(TemplateElement), @@ -307,7 +452,7 @@ pub enum TemplateLiteralElement { /// then return an updated version of the operand. /// /// If the operator is in postfix position, it returns the old value of the operand. -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum UpdateOperator { /// This will add 1 to the mathematical value of the operand. eg (a++ or ++a) Increment, @@ -317,7 +462,7 @@ pub enum UpdateOperator { /// These operators take 1 operand, and are a prefix of the operand. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-unary-operators) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum UnaryOperator { /// Reverse the sign on the operand. This will do type coercion first. /// eg. (-1) @@ -353,7 +498,7 @@ pub enum UnaryOperator { /// - [Bitwise Operators](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-binary-bitwise-operators) /// - [Logical Operators](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-binary-logical-operators) /// - [Exponentiation Operator](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-exp-operator) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum BinaryOperator { /// The double equal operator that does type coercion. (a == b) EqEq, @@ -416,7 +561,7 @@ pub enum BinaryOperator { /// Assignment operators are ones that signify a chnage to the left hand side of the expression. /// /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-assignment-operators) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum AssignmentOperator { /// The basic assignment statement. This changes the left hand side to become a /// copy of the right hand side. (eg. a = 1) @@ -453,11 +598,25 @@ pub enum AssignmentOperator { BitwiseAndEq, } +/// All the operators that have 2 arguments are merged into one big enum here for simplicity +/// sake. +/// +/// - [Logical Operators](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-binary-logical-operators) +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub enum LogicalOperator { + /// The logical or operator. This works on boolean values rather than numbers. + /// (eg true || false is true) + Or, + /// The logical and operator. This works on boolean values instead of numbers. + /// (eg true && false is false) + And, +} + /// A JSX attribute is either a simple `key={value}` attribute, or a /// spread of an object containing multiple attributes. /// /// [Reference](https://facebook.github.io/jsx/) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum JsxAttribute { /// Spread an objects key value pairs into the JSX object as well. JsxSpreadAttribute { @@ -481,18 +640,34 @@ pub enum JsxAttribute { /// For the sake of simplicity, declarations will get merged into this struct as well. /// /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-statements-and-declarations) -#[derive(Debug, Clone, PartialEq)] -pub enum Statement {} +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(tag = "type")] +pub enum Statement { + /// An expression statement. + ExpressionStatement { + /// The expression that the statement contains. + expression: Expression, + /// The source location in code where this statement starts. + loc: Option, + }, +} /// This is the main entry point to the syntax tree. A program is a list of statements, /// and statements include declarations. -#[derive(Debug, Clone, PartialEq)] -pub struct Program { - /// This represents how the source is parsed. A module is parsed in strict mode, which - /// disallows things in the parser level earlier on. - pub source_type: SourceType, - /// The list of statements or declarations made by the source text. - pub body: Vec, +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(tag = "type")] +pub enum Program { + /// There is only one enum possible for program. + #[serde(rename_all = "camelCase")] + Program { + /// The list of statements or declarations made by the source text. + body: Vec, + /// This represents how the source is parsed. A module is parsed in strict mode, which + /// disallows things in the parser level earlier on. + source_type: SourceType, + /// The location of the entire program. + loc: Option, + }, } /// This enum represents whether or not the source code contains an ECMAScript module. @@ -500,7 +675,8 @@ pub struct Program { /// other subtle behaviour differences. /// /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-scripts-and-modules) -#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] pub enum SourceType { /// The source text has no import or export declarations. Script, diff --git a/src/bin/es.rs b/src/bin/es.rs new file mode 100644 index 0000000..fae6874 --- /dev/null +++ b/src/bin/es.rs @@ -0,0 +1,35 @@ +extern crate clap; +extern crate ecmascript; +extern crate failure; +extern crate serde_json; + +use clap::{App, Arg}; +use failure::Error; + +fn main() -> Result<(), Error> { + let matches = App::new("ESTree AST exporter") + .version("0.1") + .author("Nick Dujay ") + .about("Exports ESTree AST from the first argument") + .arg( + Arg::with_name("INPUT") + .help("Sets the input source to parse") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("verbose") + .short("v") + .long("verbose") + .help("Sets the verbosity"), + ) + .get_matches(); + let source = matches.value_of("INPUT").unwrap(); + if matches.is_present("verbose") { + println!("source: {:?}", source); + } + let ast = ecmascript::parse(&source)?; + let json_string = serde_json::to_string_pretty(&ast)?; + println!("{}", json_string); + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index c0c32ed..1a34d1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![deny(missing_docs)] + //! # ecmascript //! `ecmascript` is a crate that helps you parse the ECMAScript 2017 v8.0 language. //! It also provides some useful macros to help you construct the AST @@ -9,10 +10,16 @@ extern crate failure; #[macro_use] extern crate lazy_static; extern crate unicode_xid; +#[macro_use] +extern crate serde_derive; + +extern crate serde; #[macro_use] mod macros; pub mod ast; pub mod parser; +#[cfg(test)] +mod parser_unit_test; pub use parser::parse; diff --git a/src/macros.rs b/src/macros.rs index 07c7402..f7ddae4 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -8,10 +8,10 @@ /// # use ecmascript::ast::*; /// let my_wrapper_func = build_ast! { /// [array [ -/// [true], -/// [false], -/// [null], -/// [...[array [ [num 1f64] ]]] +/// [array_item true], +/// [array_item false], +/// [array_item null], +/// [...[array [ [array_item num 1f64] ]]] /// ]] /// }; /// ``` @@ -33,15 +33,17 @@ macro_rules! build_ast { }; // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-lexical-grammar-literals (regex_lit /{$pattern:expr}/{$flags:expr}) => { - RegexLiteral { + RegExpLiteral { pattern: $pattern, flags: $flags, + loc: None, } }; (regex_lit /{$pattern:expr}/) => { - RegexLiteral { + RegExpLiteral { pattern: $pattern, flags: String::new(), + loc: None, } }; (templ_el {$cooked:expr}) => { @@ -51,56 +53,146 @@ macro_rules! build_ast { TemplateElement { cooked: $cooked, raw: $raw, + loc: None, } }; // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-expressions (this) => { - Expression::This + Expression::This(None) }; (id $id:expr) => { - Expression::IdReference($id) + Expression::Identifier{ name: $id, loc: None, } }; (null) => { - Expression::Literal(ExpressionLiteral::NullLiteral(NullLiteral)) + Expression::Literal { + value: Literal::NullLiteral(NullLiteral), + loc: None + } }; (true) => { - Expression::Literal(ExpressionLiteral::BooleanLiteral(true)) + Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: None + } }; (false) => { - Expression::Literal(ExpressionLiteral::BooleanLiteral(false)) + Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(false)), + loc: None + } }; (num $lit:expr) => { - Expression::Literal(ExpressionLiteral::NumberLiteral($lit)) + Expression::Literal { + value: Literal::NumericLiteral(NumericLiteral($lit)), + loc: None + } }; (str $lit:expr) => { - Expression::Literal(ExpressionLiteral::StringLiteral($lit)) + Expression::Literal { + value: Literal::StringLiteral(StringLiteral($lit)), + loc: None + } }; (array [$($elements:tt),*]) => { - Expression::ArrayLiteral(vec![$(build_ast!($elements)),*]) + Expression::ArrayExpression { + loc: None, + elements: vec![$(build_ast!($elements)),*] + } + }; + (array_item $($expression:tt)+) => { + ExpressionListItem::Expression(build_ast!($($expression)+)) }; - (obj [$($properties:tt),+]) => { - Expression::ObjectLiteral(vec![$(build_ast!($params)),+]) + (...$($expression:tt)+) => { + ExpressionListItem::Spread(None, build_ast!($($expression)+)) + }; + (object [$($properties:tt),*]) => { + Expression::ObjectExpression { + loc: None, + properties: vec![$(build_ast!($properties)),*] + } }; ([$($key:tt)+]: [$($value:tt)+]) => { Property { key: build_ast!($($key)+), value: build_ast!($($value)+), kind: PropertyKind::Init, + computed: false, + method: false, + shorthand: false, + loc: None, + } + }; + (get [$($key:tt)+] [$($value:tt)+]) => { + Property { + key: build_ast!($($key)+), + value: build_ast!($($value)+), + kind: PropertyKind::Get, + computed: false, + method: false, + shorthand: false, + loc: None, + } + }; + (set [$($key:tt)+] [$($value:tt)+]) => { + Property { + key: build_ast!($($key)+), + value: build_ast!($($value)+), + kind: PropertyKind::Set, + computed: false, + method: false, + shorthand: false, + loc: None, + } + }; + (function [$($params:tt),*] [$($body:tt),*]) => { + Expression::FunctionExpression { + id: None, + params: vec![$(build_ast!($params)),*], + body: vec![$(build_ast!($body)),*], + generator: false, + async: false + } + }; + (function * [$($params:tt),*] [$($body:tt),*]) => { + Expression::FunctionExpression { + id: None, + params: vec![$(build_ast!($params)),*], + body: vec![$(build_ast!($body)),*], + generator: true, + async: false + } + }; + (async function [$($params:tt),*] [$($body:tt),*]) => { + Expression::FunctionExpression { + id: None, + params: vec![$(build_ast!($params)),*], + body: vec![$(build_ast!($body)),*], + generator: false, + async: true + } + }; + (async function * [$($params:tt),*] [$($body:tt),*]) => { + Expression::FunctionExpression { + id: None, + params: vec![$(build_ast!($params)),*], + body: vec![$(build_ast!($body)),*], + generator: true, + async: true } }; - (function [$($params:tt),+] {$body:expr}) => { - Expression::Function { + (function [$($params:tt),*] {$body:expr}) => { + Expression::FunctionExpression { id: None, - params: vec![$(build_ast!($params)),+], + params: vec![$(build_ast!($params)),*], body: $body, generator: false, async: false } }; - (...[$($expression:tt)+]) => { - Expression::Spread(Box::new(build_ast!($($expression)+))) + (p_id $id:expr) => { + Pattern::Identifier(Identifier(None, $id)) }; - // whole bunch of other stuff between + // whole bunch of other stuff between (call [$($id:tt)+] [$($args:tt)+]) => { Expression::Call { callee: Box::new(build_ast!($($id)+)), @@ -115,10 +207,11 @@ macro_rules! build_ast { }; // JSX (<$id:ident />) => { - Expression::JsxElement { + Expression::JsxElementExpression { name: stringify!($id).to_string(), attributes: Vec::new(), - children: Vec::new() + children: Vec::new(), + loc: None } }; /* diff --git a/src/parser.rs b/src/parser.rs index 9754c76..113d71f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,42 +4,42 @@ //! This parser uses the error type from failure to make error interop easier for users. use ast::*; -use combine::error::ParseError; +use combine::easy; use combine::parser::char::{char, crlf, digit, hex_digit, newline, spaces, string}; use combine::parser::choice::{choice, optional}; use combine::parser::combinator::{not_followed_by, try}; use combine::parser::error::unexpected; -use combine::parser::item::{none_of, one_of, satisfy, token, value}; -use combine::parser::repeat::{count, many, many1, skip_until}; +use combine::parser::item::{none_of, one_of, position, satisfy, token, value}; +use combine::parser::repeat::{count, many, many1, sep_end_by, skip_until}; use combine::parser::sequence::between; -use combine::stream::state::State; -use combine::{eof, Parser, Stream}; +use combine::stream::state::{SourcePosition, State}; +use combine::{eof, Parser}; use failure::{self, Error}; use std::collections::HashSet; use unicode_xid::UnicodeXID; +impl From for Position { + fn from(source_position: SourcePosition) -> Self { + Position { + line: source_position.line as usize, + column: source_position.column as usize - 1, + } + } +} + // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-lexical-grammar -#[allow(dead_code)] /// This parser will consume all following whitespace tokens, including line terminators. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-white-space) -fn ws() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn ws<'a>() -> impl Parser>, Output = ()> { spaces().map(|_| ()) } -#[allow(dead_code)] /// This parser will consume a single line terminator sequence token. This parser is only needed for the /// line_comment parser as it will consume up to a single line terminator token. /// [Reference](https://www.ecma-international.org/ecma-262/9.0/index.html#sec-line-terminators) -fn line_terminator() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn line_terminator<'a>( +) -> impl Parser>, Output = ()> { newline() .or(char('\u{000D}')) .or(char('\u{2028}')) @@ -49,33 +49,21 @@ where } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-comments -#[allow(dead_code)] -fn comment() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn comment<'a>( +) -> impl Parser>, Output = ()> { try(block_comment()).or(line_comment()) } -#[allow(dead_code)] /// This parses a multiline comment, starting with /* and ending with */. /// It will consume the input and return (). -fn block_comment() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn block_comment<'a>( +) -> impl Parser>, Output = ()> { (string("/*"), skip_until(try(string("*/"))), string("*/")).map(|_| ()) } -#[allow(dead_code)] /// This parses -fn line_comment() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn line_comment<'a>( +) -> impl Parser>, Output = ()> { ( string("//"), skip_until(line_terminator()), @@ -83,36 +71,23 @@ where ).map(|_| ()) } -#[allow(dead_code)] -fn skip_tokens() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - ws().or(comment()) +fn skip_tokens<'a>( +) -> impl Parser>, Output = ()> { + (ws(), optional(try(comment())), ws()).map(|_| ()) } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-names-and-keywords -#[allow(dead_code)] fn satisfy_id_start(c: char) -> bool { UnicodeXID::is_xid_start(c) || c == '$' || c == '_' } -#[allow(dead_code)] -fn id_start() -> impl Parser -where - I: Stream, - I::Error: ParseError, +fn id_start<'a>() -> impl Parser>, Output = char> { satisfy(satisfy_id_start) } -#[allow(dead_code)] -fn unicode_id_start() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn unicode_id_start<'a>( +) -> impl Parser>, Output = char> { try(unicode_escape_sequence().map(|x| x.0).then(|c| { if satisfy_id_start(c) { value(c).left() @@ -122,27 +97,18 @@ where })).or(id_start()) } -#[allow(dead_code)] fn satisfy_id_continue(c: char) -> bool { // 200c = ZWNJ, 200d = ZWJ UnicodeXID::is_xid_continue(c) || c == '\u{200C}' || c == '\u{200D}' || c == '$' || c == '_' } -#[allow(dead_code)] -fn id_continue() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn id_continue<'a>( +) -> impl Parser>, Output = char> { satisfy(satisfy_id_continue) } -#[allow(dead_code)] -fn unicode_id_continue() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn unicode_id_continue<'a>( +) -> impl Parser>, Output = char> { try(unicode_escape_sequence().map(|x| x.0).then(|c| { if satisfy_id_continue(c) { value(c).left() @@ -153,12 +119,8 @@ where } // TODO strict mode -#[allow(dead_code)] -fn identifier() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn identifier<'a>( +) -> impl Parser>, Output = String> { (unicode_id_start(), many(unicode_id_continue())) .map(|(s, c): (char, String)| s.to_string() + &c) .then(|id| { @@ -181,7 +143,7 @@ where // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-reserved-words lazy_static! { - static ref KEYWORDS: HashSet<&'static str> = { + pub(crate) static ref KEYWORDS: HashSet<&'static str> = { [ "await", "break", @@ -221,9 +183,9 @@ lazy_static! { .cloned() .collect() }; - static ref FUTURE_RESERVED_WORDS: HashSet<&'static str> = + pub(crate) static ref FUTURE_RESERVED_WORDS: HashSet<&'static str> = { ["enum"].iter().cloned().collect() }; - static ref FUTURE_RESERVED_WORDS_STRICT: HashSet<&'static str> = { + pub(crate) static ref FUTURE_RESERVED_WORDS_STRICT: HashSet<&'static str> = { [ "implements", "package", @@ -237,110 +199,34 @@ lazy_static! { }; } -#[cfg(test)] -mod lexical_tests { - use super::*; - - #[test] - fn test_line_comment() { - assert_eq!(comment().parse("//\n"), Ok(((), ""))); - assert_eq!(comment().parse("// hello\n"), Ok(((), ""))); - } - - #[test] - fn test_block_comment() { - assert_eq!(comment().parse("/**/"), Ok(((), ""))); - assert_eq!(comment().parse("/* * */"), Ok(((), ""))); - assert_eq!(comment().parse("/** * **/"), Ok(((), ""))); - assert_eq!(comment().parse("/* hello *\n\t */"), Ok(((), ""))); - } - - #[test] - fn test_identifier() { - // making sure that the unicode_escape_sequence satisifies things - // eg. ZWNJ and ZWJ are not allowed as starts - assert!(identifier().parse(r"\u000a").is_err()); - assert!(identifier().parse(r"\u200d").is_err()); - assert!(identifier().parse(r"\u200c").is_err()); - // testing $, _, unicode_escape_sequence as start - assert_eq!(identifier().parse(r"\u24"), Ok(("$".to_string(), ""))); - assert_eq!(identifier().parse(r"_"), Ok(("_".to_string(), ""))); - // testing $, _, ZWNJ, ZWJ, unicode_escape_sequence as continue - assert_eq!(identifier().parse(r"a_"), Ok(("a_".to_string(), ""))); - assert_eq!(identifier().parse(r"a$"), Ok(("a$".to_string(), ""))); - assert_eq!( - identifier().parse(r"_\u200d"), - Ok(("_\u{200d}".to_string(), "")) - ); - assert_eq!( - identifier().parse(r"_\u200c"), - Ok(("_\u{200c}".to_string(), "")) - ); - } - - #[test] - fn test_identifier_reserved_word() { - for &keyword in KEYWORDS.iter() { - assert!(identifier().parse(keyword).is_err()); - } - for &keyword in FUTURE_RESERVED_WORDS.iter() { - assert!(identifier().parse(keyword).is_err()); - } - for &keyword in FUTURE_RESERVED_WORDS_STRICT.iter() { - assert!(identifier().parse(keyword).is_err()); - } - // null literal - assert!(identifier().parse("null").is_err()); - // boolean literal - assert!(identifier().parse("true").is_err()); - assert!(identifier().parse("false").is_err()); - } -} - // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-null-literals -#[allow(dead_code)] -fn null_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn null_literal<'a>( +) -> impl Parser>, Output = NullLiteral> { string("null").map(|_| NullLiteral) } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-boolean-literals -#[allow(dead_code)] -fn boolean_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice(( +pub(crate) fn boolean_literal<'a>( +) -> impl Parser>, Output = BooleanLiteral> { + (choice(( try(string("true")).map(|_| true), string("false").map(|_| false), - )) + ))).map(BooleanLiteral) } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-literals-numeric-literals -#[allow(dead_code)] -fn numeric_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - choice(( +pub(crate) fn numeric_literal<'a>( +) -> impl Parser>, Output = NumericLiteral> { + (choice(( try(binary_integer_literal()), try(octal_integer_literal()), try(hex_integer_literal()), decimal_literal(), - )) + ))).map(NumericLiteral) } -#[allow(dead_code)] -fn decimal_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn decimal_literal<'a>( +) -> impl Parser>, Output = f64> { ( optional(decimal_integer_literal()), optional( @@ -359,12 +245,8 @@ where .map(|s| s.parse::().unwrap()) } -#[allow(dead_code)] -fn decimal_integer_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn decimal_integer_literal<'a>( +) -> impl Parser>, Output = String> { choice(( string("0").skip(not_followed_by(digit())).map(String::from), (one_of("123456789".chars()), many::(digit())) @@ -372,12 +254,8 @@ where )) } -#[allow(dead_code)] -fn exponent_part() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn exponent_part<'a>( +) -> impl Parser>, Output = String> { ( token('e').or(token('E')), optional(token('-').or(token('+'))), @@ -390,12 +268,8 @@ where ) } -#[allow(dead_code)] -fn binary_integer_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn binary_integer_literal<'a>( +) -> impl Parser>, Output = f64> { ( token('0'), token('b').or(token('B')), @@ -403,12 +277,8 @@ where ).map(|(_, _, digits)| i64::from_str_radix(&digits, 2).unwrap() as f64) } -#[allow(dead_code)] -fn octal_integer_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn octal_integer_literal<'a>( +) -> impl Parser>, Output = f64> { ( token('0'), token('o').or(token('O')), @@ -416,12 +286,8 @@ where ).map(|(_, _, digits)| i64::from_str_radix(&digits, 8).unwrap() as f64) } -#[allow(dead_code)] -fn hex_integer_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn hex_integer_literal<'a>( +) -> impl Parser>, Output = f64> { ( token('0'), token('x').or(token('X')), @@ -430,21 +296,13 @@ where } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-literals-string-literals -#[allow(dead_code)] -fn string_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - try(double_quote_string()).or(single_quote_string()) +pub(crate) fn string_literal<'a>( +) -> impl Parser>, Output = StringLiteral> { + (try(double_quote_string()).or(single_quote_string())).map(StringLiteral) } -#[allow(dead_code)] -fn double_quote_string() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn double_quote_string<'a>( +) -> impl Parser>, Output = String> { between( token('"'), token('"'), @@ -452,24 +310,16 @@ where ) } -#[allow(dead_code)] -fn double_quote_string_character() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn double_quote_string_character<'a>( +) -> impl Parser>, Output = char> { // U+005C (REVERSE SOLIDUS), U+000D (CARRIAGE RETURN), U+2028 (LINE SEPARATOR), U+2029 (PARAGRAPH SEPARATOR), and U+000A (LINE FEED) escape_sequence().map(|x| x.0).or(none_of( "\u{005c}\u{000D}\u{2028}\u{2029}\u{000A}\"".chars(), )) } -#[allow(dead_code)] -fn single_quote_string() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn single_quote_string<'a>( +) -> impl Parser>, Output = String> { between( token('\''), token('\''), @@ -477,12 +327,8 @@ where ) } -#[allow(dead_code)] -fn single_quote_string_character() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn single_quote_string_character<'a>( +) -> impl Parser>, Output = char> { // U+005C (REVERSE SOLIDUS), U+000D (CARRIAGE RETURN), U+2028 (LINE SEPARATOR), U+2029 (PARAGRAPH SEPARATOR), and U+000A (LINE FEED) escape_sequence() .map(|x| x.0) @@ -492,12 +338,8 @@ where // (char, String) is "cooked" and "raw" // this is for template elements, to be able to get access to the raw string // this makes things uglier, but oh well -#[allow(dead_code)] -fn escape_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn escape_sequence<'a>( +) -> impl Parser>, Output = (char, String)> { choice(( try(character_escape_sequence()), try(non_escape_character_sequence()), @@ -507,12 +349,8 @@ where )) } -#[allow(dead_code)] -fn character_escape_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn character_escape_sequence<'a>( +) -> impl Parser>, Output = (char, String)> { token('\\') .and(one_of(r#"'"\bfnrtv"#.chars())) .map(|(t, c)| { @@ -529,12 +367,8 @@ where }) } -#[allow(dead_code)] -fn non_escape_character_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn non_escape_character_sequence<'a>( +) -> impl Parser>, Output = (char, String)> { token('\\') .and(none_of( "'\"\\bfnrtv0123456789xu\r\n\u{2028}\u{2029}".chars(), @@ -542,12 +376,8 @@ where .map(|(t, c)| (c, format!("{}{}", t, c))) } -#[allow(dead_code)] -fn hex_escape_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn hex_escape_sequence<'a>( +) -> impl Parser>, Output = (char, String)> { (token('\\'), token('x'), count::(2, hex_digit())).map(|(t, x, hex_digits)| { let code_point = u32::from_str_radix(&hex_digits, 16).unwrap(); let cooked = ::std::char::from_u32(code_point).unwrap(); @@ -556,12 +386,8 @@ where } // https://www.ecma-international.org/ecma-262/9.0/index.html#prod-UnicodeEscapeSequence -#[allow(dead_code)] -fn unicode_escape_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn unicode_escape_sequence<'a>( +) -> impl Parser>, Output = (char, String)> { ( token('\\'), token('u'), @@ -592,73 +418,53 @@ where } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-literals-regular-expression-literals -#[allow(dead_code)] -fn regex_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_literal_expression<'a>( +) -> impl Parser>, Output = Expression> { + (position(), regex_literal(), position()).map(|(start, value, end)| Expression::Literal { + value: Literal::RegExpLiteral(value), + loc: Some((start, end).into()), + }) +} + +pub(crate) fn regex_literal<'a>( +) -> impl Parser>, Output = RegExpLiteral> { ( between(token('/'), token('/'), regex_body()), many::(id_continue()), - ).map(|(pattern, flags)| RegexLiteral { pattern, flags }) + ).map(|(pattern, flags)| RegExpLiteral { pattern, flags }) } -#[allow(dead_code)] -fn regex_body() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_body<'a>( +) -> impl Parser>, Output = String> { (regex_first_char(), many::(regex_char())).map(|(s, s2): (String, String)| s + &s2) } -#[allow(dead_code)] -fn regex_first_char() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_first_char<'a>( +) -> impl Parser>, Output = String> { try(regex_backslash_sequence()) .or(try(regex_class())) .or(none_of("*/\\[\n\r\u{2028}\u{2029}".chars()).map(|c: char| c.to_string())) } -#[allow(dead_code)] -fn regex_char() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_char<'a>( +) -> impl Parser>, Output = String> { try(regex_backslash_sequence()) .or(try(regex_class())) .or(none_of("/\\[\n\r\u{2028}\u{2029}".chars()).map(|c: char| c.to_string())) } -#[allow(dead_code)] -fn regex_non_terminator() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_non_terminator<'a>( +) -> impl Parser>, Output = char> { none_of("\n\r\u{2028}\u{2029}".chars()) } -#[allow(dead_code)] -fn regex_backslash_sequence() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_backslash_sequence<'a>( +) -> impl Parser>, Output = String> { (token('\\'), regex_non_terminator()).map(|(c, s): (char, char)| c.to_string() + &s.to_string()) } -#[allow(dead_code)] -fn regex_class() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn regex_class<'a>( +) -> impl Parser>, Output = String> { ( token('['), many::( @@ -671,55 +477,55 @@ where } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-template-literal-lexical-components -#[allow(dead_code)] -fn template() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn template<'a>( +) -> impl Parser>, Output = TemplateElement> { choice((try(no_substition_template()), template_head())) } -#[allow(dead_code)] -fn no_substition_template() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - token('`'), - token('`'), - many::, _>(template_character()), - ).map(|pairs| { +fn no_substition_template<'a>( +) -> impl Parser>, Output = TemplateElement> { + ( + position(), + between( + token('`'), + token('`'), + many::, _>(template_character()), + ), + position(), + ).map(|(start, pairs, end)| { let cooked = pairs.iter().cloned().map(|x| x.0).collect(); let raw = pairs.iter().cloned().map(|x| x.1).collect(); - TemplateElement { cooked, raw } + TemplateElement { + cooked, + raw, + loc: Some((start, end).into()), + } }) } -#[allow(dead_code)] -fn template_head() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - token('`'), - string("${"), - many::, _>(template_character()), - ).map(|pairs| { +fn template_head<'a>( +) -> impl Parser>, Output = TemplateElement> { + ( + position(), + between( + token('`'), + string("${"), + many::, _>(template_character()), + ), + position(), + ).map(|(start, pairs, end)| { let cooked = pairs.iter().cloned().map(|x| x.0).collect(); let raw = pairs.iter().cloned().map(|x| x.1).collect(); - TemplateElement { cooked, raw } + TemplateElement { + cooked, + raw, + loc: Some((start, end).into()), + } }) } -#[allow(dead_code)] -fn template_character() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn template_character<'a>( +) -> impl Parser>, Output = (char, String)> { choice(( try(token('$').skip(not_followed_by(token('{')))).map(|x: char| (x, x.to_string())), try(escape_sequence()), @@ -728,402 +534,242 @@ where )) } -#[allow(dead_code)] -fn template_substition_tail() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn template_substition_tail<'a>( +) -> impl Parser>, Output = TemplateElement> { choice((try(template_middle()), template_tail())) } -#[allow(dead_code)] -fn template_middle() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - token('}'), - string("${"), - many::, _>(template_character()), - ).map(|pairs| { +fn template_middle<'a>( +) -> impl Parser>, Output = TemplateElement> { + ( + position(), + between( + token('}'), + string("${"), + many::, _>(template_character()), + ), + position(), + ).map(|(start, pairs, end)| { let cooked = pairs.iter().cloned().map(|x| x.0).collect(); let raw = pairs.iter().cloned().map(|x| x.1).collect(); - TemplateElement { cooked, raw } + TemplateElement { + cooked, + raw, + loc: Some((start, end).into()), + } }) } -#[allow(dead_code)] -fn template_tail() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - token('}') - .with(many::, _>(template_character())) - .map(|pairs| { - let cooked = pairs.iter().cloned().map(|x| x.0).collect(); - let raw = pairs.iter().cloned().map(|x| x.1).collect(); - TemplateElement { cooked, raw } - }) -} - -#[cfg(test)] -mod literal_tests { - use super::*; - - #[test] - fn test_null_literal() { - assert_eq!(null_literal().parse("null"), Ok((NullLiteral, ""))); - } - - #[test] - fn test_boolean_literal() { - assert_eq!(boolean_literal().parse("true"), Ok((true, ""))); - assert_eq!(boolean_literal().parse("false"), Ok((false, ""))); - } - - #[test] - fn test_number_literal() { - // decimal - assert_eq!(numeric_literal().parse("0"), Ok((0f64, ""))); - assert!(numeric_literal().parse("01").is_err()); - assert!(numeric_literal().parse("01.").is_err()); - assert_eq!(numeric_literal().parse("9"), Ok((9f64, ""))); - assert_eq!(numeric_literal().parse("10"), Ok((10f64, ""))); - assert_eq!(numeric_literal().parse("0.1"), Ok((0.1f64, ""))); - assert_eq!(numeric_literal().parse(".1"), Ok((0.1f64, ""))); - assert_eq!(numeric_literal().parse("1e1"), Ok((10f64, ""))); - assert_eq!(numeric_literal().parse(".1e1"), Ok((1f64, ""))); - assert_eq!(numeric_literal().parse("1.1e1"), Ok((11f64, ""))); - - // binary - assert_eq!(numeric_literal().parse("0b1010"), Ok((10f64, ""))); - assert_eq!(numeric_literal().parse("0B1010"), Ok((10f64, ""))); - // octal - assert_eq!(numeric_literal().parse("0o123"), Ok((83f64, ""))); - assert_eq!(numeric_literal().parse("0O123"), Ok((83f64, ""))); - // hex - assert_eq!( - numeric_literal().parse("0xDEADBEEF"), - Ok((3735928559f64, "")) - ); - assert_eq!( - numeric_literal().parse("0XDEADBEEF"), - Ok((3735928559f64, "")) - ); - } - - #[test] - fn test_string_literal() { - // empty - assert_eq!(string_literal().parse(r#""""#), Ok((String::new(), ""))); - assert_eq!(string_literal().parse("''"), Ok((String::new(), ""))); - // not allowed chars - for not_allowed_char in "\u{005c}\u{000D}\u{2028}\u{2029}\u{000A}".chars() { - let double_quote_slice: &str = &format!("\"{}\"", not_allowed_char); - let single_quote_slice: &str = &format!("'{}'", not_allowed_char); - assert!(string_literal().parse(double_quote_slice).is_err()); - assert!(string_literal().parse(single_quote_slice).is_err()); - } - // character escape sequences - for escaped_character in r#"'"\bfnrtv"#.chars() { - let double_quote_slice: &str = &format!("\"\\{}\"", escaped_character); - let single_quote_slice: &str = &format!("'\\{}'", escaped_character); - assert!(string_literal().parse(double_quote_slice).is_ok()); - assert!(string_literal().parse(single_quote_slice).is_ok()); - } - // non character escape sequences - assert_eq!(string_literal().parse("\"\\a\""), Ok(("a".to_string(), ""))); - assert_eq!(string_literal().parse("'\\a'"), Ok(("a".to_string(), ""))); - - // hex escape sequence - assert_eq!( - string_literal().parse(r#""\x0A""#), - Ok(("\n".to_string(), "")) - ); - assert_eq!( - string_literal().parse(r"'\x0a'"), - Ok(("\n".to_string(), "")) - ); - // unicode escape sequence - assert_eq!( - string_literal().parse(r#""\u2764""#), - Ok(("❤".to_string(), "")) - ); - assert_eq!( - string_literal().parse(r"'\u2764'"), - Ok(("❤".to_string(), "")) - ); - assert_eq!( - string_literal().parse(r#""\u{2764}""#), - Ok(("❤".to_string(), "")) - ); - assert_eq!( - string_literal().parse(r"'\u{2764}'"), - Ok(("❤".to_string(), "")) - ); - assert!(string_literal().parse(r"'\u{110000}'").is_err()); - - // line continuation - for line_continuation_char in "\r\n\u{2028}\u{2029}".chars() { - let double_quote_slice: &str = &format!("\"\\{}\"", line_continuation_char); - let single_quote_slice: &str = &format!("'\\{}'", line_continuation_char); - assert!(string_literal().parse(double_quote_slice).is_err()); - assert!(string_literal().parse(single_quote_slice).is_err()); - } - } - - #[test] - fn test_regex_literal() { - // must be non empty - assert!(regex_literal().parse("//").is_err()); - - // not allowed first chars - for c in "*\\/[".chars() { - let slice: &str = &format!("/{}/", c); - assert!(regex_literal().parse(slice).is_err()); - } - - // backslash as first char - assert_eq!( - regex_literal().parse("/\\a/"), - Ok((build_ast!(regex_lit /{"\\a".to_string()}/), "")) - ); - - // character class as first char - assert_eq!( - regex_literal().parse("/[ab]/"), - Ok((build_ast!(regex_lit /{"[ab]".to_string()}/), "")) - ); - - // not allowed second chars - /* - for c in "\\/[".chars() { - let slice: &str = &format!("/a{}/", c); - assert!(regex_literal().parse(slice).is_err()); - } - */ - - // backslash as second char - assert_eq!( - regex_literal().parse("/a\\a/"), - Ok((build_ast!(regex_lit /{"a\\a".to_string()}/), "")) - ); - - // character class as second char - assert_eq!( - regex_literal().parse("/a[ab]/"), - Ok((build_ast!(regex_lit /{"a[ab]".to_string()}/), "")) - ); - - // character class with unallowed chars - /* - for c in "\\/]".chars() { - let slice: &str = &format!("/a[{}]/", c); - assert!(regex_literal().parse(slice).is_err()); - } - */ - - // character class with backslash - assert_eq!( - regex_literal().parse("/a[ab\\]]/"), - Ok((build_ast!(regex_lit /{"a[ab\\]]".to_string()}/), "")) - ); - - // flags - assert_eq!( - regex_literal().parse("/a/f"), - Ok(( - build_ast!(regex_lit / { "a".to_string() } / { "f".to_string() }), - "" - )) - ); - assert_eq!( - regex_literal().parse("/a/fi"), - Ok(( - build_ast!(regex_lit / { "a".to_string() } / { "fi".to_string() }), - "" - )) - ); - assert!(regex_literal().skip(eof()).parse("/a/\\u1234").is_err()); - } - - #[test] - fn test_template_elements() { - // empty - assert_eq!( - template().parse("``"), - Ok((build_ast!(templ_el {String::new()}), "")) - ); - - // no_substitution_template - assert_eq!( - template().parse("`asd`"), - Ok((build_ast!(templ_el {"asd".to_string()}), "")) - ); - - // template_head - assert_eq!( - template().parse("`asd ${eval}`"), - Ok((build_ast!(templ_el {"asd ".to_string()}), "eval}`")) - ); - - // template_middle - assert_eq!( - template_substition_tail().parse("} asd ${eval}`"), - Ok((build_ast!(templ_el {" asd ".to_string()}), "eval}`")) - ); - - // template_tail - assert_eq!( - template_substition_tail().parse("} asd"), - Ok((build_ast!(templ_el {" asd".to_string()}), "")) - ); - - // $ - assert_eq!( - template_character().parse("$123"), - Ok((('$', "$".to_string()), "123")) - ); - // escape sequence - assert_eq!( - template_character().parse("\\n"), - Ok((('\n', "\\n".to_string()), "")) - ); - assert_eq!( - template_character().parse("\\x0A"), - Ok((('\n', "\\x0A".to_string()), "")) - ); - assert_eq!( - template_character().parse("\\u2764"), - Ok((('❤', "\\u2764".to_string()), "")) - ); - assert_eq!( - template_character().parse("\\u{2764}"), - Ok((('❤', "\\u{2764}".to_string()), "")) - ); - // line continuation - for line_continuation_char in "\r\n\u{2028}\u{2029}".chars() { - let slice: &str = &line_continuation_char.to_string(); - assert_eq!( - template_character().parse(slice), - Ok(( - (line_continuation_char, line_continuation_char.to_string()), - "" - )) - ); +fn template_tail<'a>( +) -> impl Parser>, Output = TemplateElement> { + ( + position(), + token('}').with(many::, _>(template_character())), + position(), + ).map(|(start, pairs, end)| { + let cooked = pairs.iter().cloned().map(|x| x.0).collect(); + let raw = pairs.iter().cloned().map(|x| x.1).collect(); + TemplateElement { + cooked, + raw, + loc: Some((start, end).into()), } - } + }) } // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-expressions -#[allow(dead_code)] -fn primary_expression() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +pub(crate) fn primary_expression<'a>( +) -> impl Parser>, Output = Expression> { choice(( try(this()), - try(identifier_reference()), + try(identifier_expression()), try(literal()), try(array_literal()), + try(object_literal()), + try(regex_literal_expression()), jsx_element(), )) } -#[allow(dead_code)] -fn this() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - string("this").map(|_| Expression::This) +fn this<'a>( +) -> impl Parser>, Output = Expression> { + (position(), string("this"), position()).map(|(start, _, end)| Expression::ThisExpression { + loc: Some((start, end).into()), + }) } -#[allow(dead_code)] -fn identifier_reference() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - identifier().map(Expression::IdReference) +fn identifier_expression<'a>( +) -> impl Parser>, Output = Expression> { + (position(), identifier(), position()).map(|(start, name, end)| Expression::Identifier { + name, + loc: Some((start, end).into()), + }) +} + +fn literal<'a>( +) -> impl Parser>, Output = Expression> { + ( + position(), + choice(( + try(null_literal()).map(Literal::NullLiteral), + try(boolean_literal()).map(Literal::BooleanLiteral), + try(numeric_literal()).map(Literal::NumericLiteral), + try(string_literal()).map(Literal::StringLiteral), + )), + position(), + ).map(|(start, value, end)| Expression::Literal { + value, + loc: Some((start, end).into()), + }) +} + +fn array_literal<'a>( +) -> impl Parser>, Output = Expression> { + ( + position(), + between( + token('[').skip(skip_tokens()), + token(']').skip(skip_tokens()), + elision().with(element_list()).skip(elision()), + ), + position(), + ).map(|(start, elements, end)| Expression::ArrayExpression { + elements, + loc: Some((start, end).into()), + }) +} + +fn elision<'a>() -> impl Parser>, Output = ()> { + many::, _>(token(',')).with(skip_tokens()) } -#[allow(dead_code)] -fn literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, +fn element_list<'a>( +) -> impl Parser>, Output = Vec> { + many( + choice(( + try(assignment_expression()).map(ExpressionListItem::Expression), + spread_element(), + )).skip(elision()), + ) +} + +fn object_literal<'a>( +) -> impl Parser>, Output = Expression> { + ( + position(), + between( + token('{').skip(skip_tokens()), + token('}').skip(skip_tokens()), + sep_end_by( + property_definition().skip(skip_tokens()), + token(',').skip(skip_tokens()), + ), + ), + position(), + ).map(|(start, properties, end)| Expression::ObjectExpression { + loc: Some((start, end).into()), + properties, + }) +} + +fn property_definition<'a>( +) -> impl Parser>, Output = Property> { choice(( - try(null_literal()).map(|n| Expression::Literal(ExpressionLiteral::NullLiteral(n))), - try(boolean_literal()).map(|n| Expression::Literal(ExpressionLiteral::BooleanLiteral(n))), - try(numeric_literal()).map(|n| Expression::Literal(ExpressionLiteral::NumberLiteral(n))), - try(string_literal()).map(|n| Expression::Literal(ExpressionLiteral::StringLiteral(n))), + try(property_initializer()), + try(method_definition()), + try(shorthand_property()), )) } -#[allow(dead_code)] -fn array_literal() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between( - token('[').skip(skip_tokens()), - token(']').skip(skip_tokens()), - elision().with(element_list()).skip(elision()), - ).map(Expression::ArrayLiteral) +fn shorthand_property<'a>( +) -> impl Parser>, Output = Property> { + (position(), identifier_expression(), position()).map(|(start, id, end)| Property { + key: id.clone(), + value: id, + kind: PropertyKind::Init, + method: false, + shorthand: true, + computed: false, + loc: Some((start, end).into()), + }) } -#[allow(dead_code)] -fn elision() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - many::, _>(token(',')).with(skip_tokens()) +fn property_initializer<'a>( +) -> impl Parser>, Output = Property> { + ( + position(), + property_name(), + skip_tokens(), + token(':'), + skip_tokens(), + literal(), + position(), + ).map(|(start, (key, computed), _, _, _, value, end)| Property { + key, + value, + kind: PropertyKind::Init, + method: false, + shorthand: false, + computed, + loc: Some((start, end).into()), + }) } -#[allow(dead_code)] -fn element_list() -> impl Parser> -where - I: Stream, - I::Error: ParseError, +fn property_name<'a>( +) -> impl Parser>, Output = (Expression, bool)> { - many(choice((try(assignment_expression()), spread_element())).skip(elision())) + choice(( + try(literal_property_name()).map(|e| (e, false)), + try(computed_property_name()).map(|e| (e, true)), + )) } -#[allow(dead_code)] -fn spread_element() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - string("...") - .with(assignment_expression()) - .map(Box::new) - .map(Expression::Spread) +fn literal_property_name<'a>( +) -> impl Parser>, Output = Expression> { + choice(( + identifier_expression(), + (position(), string_literal(), position()).map(|(start, value, end)| Expression::Literal { + value: Literal::StringLiteral(value), + loc: Some((start, end).into()), + }), + (position(), numeric_literal(), position()).map(|(start, value, end)| { + Expression::Literal { + value: Literal::NumericLiteral(value), + loc: Some((start, end).into()), + } + }), + )) +} + +fn computed_property_name<'a>( +) -> impl Parser>, Output = Expression> { + between( + token('[').skip(skip_tokens()), + token(']'), + assignment_expression(), + ) } -#[allow(dead_code)] -fn assignment_expression() -> impl Parser -where - I: Stream, - I::Error: ParseError, +fn spread_element<'a>( +) -> impl Parser>, Output = ExpressionListItem> { + ( + position(), + string("...").with(assignment_expression()), + position(), + ).map(|(start, expression, end)| { + ExpressionListItem::Spread(Some((start, end).into()), expression) + }) +} + +fn assignment_expression<'a>( +) -> impl Parser>, Output = Expression> { yield_expression() } -#[allow(dead_code)] -fn yield_expression() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn yield_expression<'a>( +) -> impl Parser>, Output = Expression> { ( string("yield"), skip_tokens(), @@ -1136,51 +782,48 @@ where } // https://facebook.github.io/jsx/ -#[allow(dead_code)] -fn jsx_element() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn jsx_element<'a>( +) -> impl Parser>, Output = Expression> { choice((try(jsx_self_closing_element()), jsx_matched_element())) } -#[allow(dead_code)] -fn jsx_self_closing_element() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - between(token('<'), string("/>"), identifier()).map(|name| Expression::JsxElement { +fn jsx_self_closing_element<'a>( +) -> impl Parser>, Output = Expression> { + ( + position(), + between(token('<'), string("/>"), identifier()), + position(), + ).map(|(start, name, end)| Expression::JsxElementExpression { name, attributes: Vec::new(), children: Vec::new(), + loc: Some((start, end).into()), }) } -#[allow(dead_code)] -fn jsx_matched_element() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ +fn jsx_matched_element<'a>( +) -> impl Parser>, Output = Expression> { ( + position(), between(token('<'), string(">"), identifier()), skip_tokens(), between(string("'), identifier()), - ).then(|(opening_name, _, closing_name)| { + position(), + ).then(|(start, opening_name, _, closing_name, end)| { if opening_name == closing_name { - value(Expression::JsxElement { + value(Expression::JsxElementExpression { name: opening_name, attributes: Vec::new(), children: Vec::new(), + loc: Some((start, end).into()), }).left() } else { unexpected("closing element") - .map(|_| Expression::JsxElement { + .map(|_| Expression::JsxElementExpression { name: String::new(), attributes: Vec::new(), children: Vec::new(), + loc: None, }) .message("closing name is not the same as opening name") .right() @@ -1188,98 +831,193 @@ where }) } -#[cfg(test)] -mod expression_test { - use super::*; +// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-statements-and-declarations +fn statement<'a>( +) -> impl Parser>, Output = Statement> { + // TODO use assignment_expression instead + (position(), primary_expression(), position()).map(|(start, expression, end)| { + Statement::ExpressionStatement { + loc: Some((start, end).into()), + expression, + } + }) +} - #[test] - fn test_this() { - assert_eq!( - primary_expression().parse("this"), - Ok((build_ast!(this), "")) - ); - } +// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-functions-and-classes - #[test] - fn test_identifier_reference() { - assert_eq!( - primary_expression().parse("abc123"), - Ok((build_ast!(id "abc123".to_string()), "")) - ); - } +fn formal_parameters<'a>( +) -> impl Parser>, Output = Vec> { + between( + token('(').skip(skip_tokens()), + token(')'), + value(Vec::new()), + ) +} - #[test] - fn test_literal() { - assert_eq!( - primary_expression().parse("null"), - Ok((build_ast!(null), "")) - ); - assert_eq!( - primary_expression().parse("true"), - Ok((build_ast!(true), "")) - ); - assert_eq!( - primary_expression().parse("false"), - Ok((build_ast!(false), "")) - ); - assert_eq!( - primary_expression().parse("123.e1"), - Ok((build_ast!(num 1230f64), "")) - ); - assert_eq!( - primary_expression().parse("'abc'"), - Ok((build_ast!(str "abc".to_string()), "")) - ); - } +fn formal_parameter<'a>( +) -> impl Parser>, Output = Pattern> { + (position(), identifier(), position()) + .map(|(start, id, end)| Identifier(Some((start, end).into()), id)) + .map(Pattern::Identifier) +} - #[test] - fn test_array_literal() { - assert_eq!( - primary_expression().parse("[]"), - Ok((build_ast!(array []), "")) - ); - assert_eq!( - primary_expression().parse("[,,,,]"), - Ok((build_ast!(array []), "")) - ); - assert_eq!( - primary_expression().parse("[,,,,yield,,yield,,,]"), - Ok((build_ast!(array [ [yield], [yield] ]), "")) - ); - assert_eq!( - primary_expression().parse("[,,,...yield,,,]"), - Ok((build_ast!(array [ [...[yield]] ]), "")) - ); - } +fn function_body<'a>( + _yield: bool, + _await: bool, +) -> impl Parser>, Output = Vec> { + between( + token('{').skip(skip_tokens()), + token('}'), + value(Vec::new()), + ) +} - #[test] - fn test_jsx() { - assert_eq!( - primary_expression().parse("
"), - Ok((build_ast!(
), "")) - ); - assert_eq!( - primary_expression().parse("
\n\n
"), - Ok((build_ast!(
), "")) - ); - assert!(primary_expression().parse("
\n\n").is_err()); - } +fn method_definition<'a>( +) -> impl Parser>, Output = Property> { + choice(( + try(getter_method_definition()), + try(setter_method_definition()), + try(generator_method_definition()), + try(async_generator_method_definition()), + try(async_method_definition()), + basic_method_definition(false, false), + )) } -// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-statements-and-declarations +fn basic_method_definition<'a>( + _yield: bool, + _await: bool, +) -> impl Parser>, Output = Property> { + ( + position(), + property_name(), + skip_tokens(), + formal_parameters(), + skip_tokens(), + function_body(_yield, _await), + // lololol + // the value here is just to pass the yield and await into the map below + value(_yield), + value(_await), + position(), + ).map( + |(start, (key, computed), _, params, _, body, _yield, _await, end)| Property { + key, + value: Expression::FunctionExpression { + id: None, + async: _await, + generator: _yield, + body, + params, + }, + kind: PropertyKind::Init, + method: true, + shorthand: false, + computed, + loc: Some((start, end).into()), + }, + ) +} -// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-functions-and-classes +fn generator_method_definition<'a>( +) -> impl Parser>, Output = Property> { + ( + token('*'), + skip_tokens(), + basic_method_definition(true, false), + ).map(|x| x.2) +} + +fn async_method_definition<'a>( +) -> impl Parser>, Output = Property> { + ( + string("async"), + skip_tokens(), + basic_method_definition(false, true), + ).map(|x| x.2) +} + +fn async_generator_method_definition<'a>( +) -> impl Parser>, Output = Property> { + ( + string("async"), + skip_tokens(), + token('*'), + skip_tokens(), + basic_method_definition(true, true), + ).map(|x| x.4) +} + +fn getter_method_definition<'a>( +) -> impl Parser>, Output = Property> { + ( + position(), + string("get").skip(skip_tokens()), + property_name().skip(skip_tokens()), + token('(').skip(skip_tokens()), + token(')').skip(skip_tokens()), + function_body(false, false), + position(), + ).map(|(start, _, (key, computed), _, _, body, end)| Property { + key, + value: Expression::FunctionExpression { + id: None, + async: false, + generator: false, + body, + params: Vec::new(), + }, + kind: PropertyKind::Get, + method: false, + shorthand: false, + computed, + loc: Some((start, end).into()), + }) +} + +fn setter_method_definition<'a>( +) -> impl Parser>, Output = Property> { + ( + position(), + string("set").skip(skip_tokens()), + property_name().skip(skip_tokens()), + between( + token('(').skip(skip_tokens()), + token(')').skip(skip_tokens()), + formal_parameter().skip(skip_tokens()), + ), + function_body(false, false), + position(), + ).map(|(start, _, (key, computed), param, body, end)| Property { + key, + value: Expression::FunctionExpression { + id: None, + async: false, + generator: false, + body, + params: vec![param], + }, + kind: PropertyKind::Set, + method: false, + shorthand: false, + computed, + loc: Some((start, end).into()), + }) +} // https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-scripts-and-modules -#[allow(dead_code)] -fn program() -> impl Parser -where - I: Stream, - I::Error: ParseError, -{ - eof().map(|_| Program { - source_type: SourceType::Module, - body: Vec::new(), +fn program<'a>( +) -> impl Parser>, Output = Program> { + ( + position(), + skip_tokens(), + many::, _>(statement().skip(skip_tokens())), + eof(), + position(), + ).map(|(start, _, body, _, end)| Program::Program { + source_type: SourceType::Script, + body, + loc: Some((start, end).into()), }) } diff --git a/src/parser_unit_test.rs b/src/parser_unit_test.rs new file mode 100644 index 0000000..bbe9511 --- /dev/null +++ b/src/parser_unit_test.rs @@ -0,0 +1,974 @@ +use ast::*; +use combine::stream::state::State; +use combine::{eof, Parser}; +use parser::*; + +macro_rules! assert_parse_success { + ($parser:ident, $input:expr, $result:expr) => { + assert_eq!( + ($parser(), eof()) + .map(|x| x.0) + .easy_parse(State::new($input)) + .map(|x| x.0), + Ok($result) + ); + }; +} + +macro_rules! assert_parse_failure { + ($parser:ident, $input:expr) => { + assert!(($parser(), eof()).easy_parse(State::new($input)).is_err()); + }; +} + +#[test] +fn test_block_comment() { + assert_parse_success!(comment, "/**/", ()); + assert_parse_success!(comment, "/* * */", ()); + assert_parse_success!(comment, "/** * **/", ()); + assert_parse_success!(comment, "/* hello *\n\t */", ()); +} + +#[test] +fn test_identifier_start_invalid_escape_sequence() { + // making sure that the unicode_escape_sequence satisifies things + // eg. ZWNJ and ZWJ are not allowed as starts + assert_parse_failure!(identifier, r"\u000a"); + assert_parse_failure!(identifier, r"\u200d"); + assert_parse_failure!(identifier, r"\u200c"); +} + +#[test] +fn test_identifier_start_valid() { + // testing $, _, unicode_escape_sequence as start + assert_parse_success!(identifier, r"\u24", "$".to_string()); + assert_parse_success!(identifier, r"_", "_".to_string()); +} + +#[test] +fn test_identifier_continue_valid() { + // testing $, _, ZWNJ, ZWJ, unicode_escape_sequence as continue + assert_parse_success!(identifier, r"a_", "a_".to_string()); + assert_parse_success!(identifier, r"a$", "a$".to_string()); + assert_parse_success!(identifier, r"_\u200d", "_\u{200d}".to_string()); + assert_parse_success!(identifier, r"_\u200c", "_\u{200c}".to_string()); +} + +#[test] +fn test_identifier_reserved_word() { + for &keyword in KEYWORDS.iter() { + assert_parse_failure!(identifier, keyword); + } + for &keyword in FUTURE_RESERVED_WORDS.iter() { + assert_parse_failure!(identifier, keyword); + } + for &keyword in FUTURE_RESERVED_WORDS_STRICT.iter() { + assert_parse_failure!(identifier, keyword); + } + + // null literal + assert_parse_failure!(identifier, "null"); + // boolean literal + assert_parse_failure!(identifier, "true"); + assert_parse_failure!(identifier, "false"); +} + +#[test] +fn test_null_literal() { + assert_parse_success!(null_literal, "null", NullLiteral); +} + +#[test] +fn test_boolean_literal() { + assert_parse_success!(boolean_literal, "true", BooleanLiteral(true)); + assert_parse_success!(boolean_literal, "false", BooleanLiteral(false)); +} + +#[test] +fn test_number_literal_decimal() { + // decimal + assert_parse_success!(numeric_literal, "0", NumericLiteral(0f64)); + assert_parse_failure!(numeric_literal, "01"); + assert_parse_failure!(numeric_literal, "01."); + assert_parse_success!(numeric_literal, "9", NumericLiteral(9f64)); + assert_parse_success!(numeric_literal, "10", NumericLiteral(10f64)); + assert_parse_success!(numeric_literal, "0.1", NumericLiteral(0.1f64)); + assert_parse_success!(numeric_literal, ".1", NumericLiteral(0.1f64)); + assert_parse_success!(numeric_literal, "1e1", NumericLiteral(10f64)); + assert_parse_success!(numeric_literal, ".1e1", NumericLiteral(1f64)); + assert_parse_success!(numeric_literal, "1.1e1", NumericLiteral(11f64)); +} + +#[test] +fn test_number_literal_binary() { + // binary + assert_parse_success!(numeric_literal, "0b1010", NumericLiteral(10f64)); + assert_parse_success!(numeric_literal, "0B1010", NumericLiteral(10f64)); +} + +#[test] +fn test_number_literal_octal() { + // octal + assert_parse_success!(numeric_literal, "0o123", NumericLiteral(83f64)); + assert_parse_success!(numeric_literal, "0O123", NumericLiteral(83f64)); +} + +#[test] +fn test_number_literal_hex() { + // hex + assert_parse_success!(numeric_literal, "0XDEADBEEF", NumericLiteral(3735928559f64)); + assert_parse_success!(numeric_literal, "0xDEADBEEF", NumericLiteral(3735928559f64)); +} + +#[test] +fn test_string_literal_empty() { + // empty + assert_parse_success!(string_literal, r#""""#, StringLiteral(String::new())); + assert_parse_success!(string_literal, "''", StringLiteral(String::new())); +} + +#[test] +fn test_string_literal_invalid_chars() { + // not allowed chars + for not_allowed_char in "\u{005c}\u{000D}\u{2028}\u{2029}\u{000A}".chars() { + let double_quote_slice: &str = &format!("\"{}\"", not_allowed_char); + let single_quote_slice: &str = &format!("'{}'", not_allowed_char); + assert_parse_failure!(string_literal, double_quote_slice); + assert_parse_failure!(string_literal, single_quote_slice); + } +} + +#[test] +fn test_string_literal_character_escape_sequence() { + // character escape sequences + let escape_chars = r#"'"\bfnrtv"#.chars(); + let escape_char_values = "\'\"\\\u{8}\u{c}\n\r\t\u{b}".chars(); + for (escaped_character, value) in escape_chars.zip(escape_char_values) { + let double_quote_slice: &str = &format!("\"\\{}\"", escaped_character); + let single_quote_slice: &str = &format!("'\\{}'", escaped_character); + assert_parse_success!( + string_literal, + double_quote_slice, + StringLiteral(value.to_string()) + ); + assert_parse_success!( + string_literal, + single_quote_slice, + StringLiteral(value.to_string()) + ); + } + // non character escape sequences + assert_parse_success!(string_literal, "\"\\a\"", StringLiteral("a".to_string())); + assert_parse_success!(string_literal, "'\\a'", StringLiteral("a".to_string())); +} + +#[test] +fn test_string_literal_hex_escape_sequence() { + // hex escape sequence + assert_parse_success!(string_literal, r#""\x0A""#, StringLiteral("\n".to_string())); + assert_parse_success!(string_literal, r#"'\x0A'"#, StringLiteral("\n".to_string())); +} + +#[test] +fn test_string_literal_unicode_escape_sequence() { + // unicode escape sequence + assert_parse_success!( + string_literal, + r#""\u2764""#, + StringLiteral("❤".to_string()) + ); + assert_parse_success!( + string_literal, + r"'\u2764'", + StringLiteral("❤".to_string()) + ); + assert_parse_success!( + string_literal, + r#""\u{2764}""#, + StringLiteral("❤".to_string()) + ); + assert_parse_success!( + string_literal, + r"'\u{2764}'", + StringLiteral("❤".to_string()) + ); + assert_parse_failure!(string_literal, r"'\u{110000}'"); +} + +#[test] +fn test_string_literal_line_continuation_invalid() { + // line continuation + for line_continuation_char in "\r\n\u{2028}\u{2029}".chars() { + let double_quote_slice: &str = &format!("\"\\{}\"", line_continuation_char); + let single_quote_slice: &str = &format!("'\\{}'", line_continuation_char); + assert_parse_failure!(string_literal, double_quote_slice); + assert_parse_failure!(string_literal, single_quote_slice); + } +} + +#[test] +fn test_regex_literal_empty() { + // must be non empty + assert_parse_failure!(regex_literal, "//"); +} + +#[test] +fn test_regex_literal_start_invalid() { + // not allowed first chars + for c in "*\\/[".chars() { + let slice: &str = &format!("/{}/", c); + assert_parse_failure!(regex_literal, slice); + } +} + +#[test] +fn test_regex_literal_start_backslash() { + // backslash as first char + assert_parse_success!( + regex_literal, + "/\\a/", + RegExpLiteral { + pattern: "\\a".to_string(), + flags: String::new(), + } + ); +} + +#[test] +fn test_regex_literal_start_character_class() { + // character class as first char + assert_parse_success!( + regex_literal, + "/[ab]/", + RegExpLiteral { + pattern: "[ab]".to_string(), + flags: String::new(), + } + ); +} + +#[test] +fn test_regex_literal_continue_backslash() { + // backslash as second char + assert_parse_success!( + regex_literal, + "/a\\a/", + RegExpLiteral { + pattern: "a\\a".to_string(), + flags: String::new(), + } + ); +} + +#[test] +fn test_regex_literal_continue_character_class() { + // character class as second char + assert_parse_success!( + regex_literal, + "/a[ab]/", + RegExpLiteral { + pattern: "a[ab]".to_string(), + flags: String::new(), + } + ); +} + +#[test] +fn test_regex_literal_character_class_backslash() { + // character class with backslash + assert_parse_success!( + regex_literal, + "/a[ab\\]]/", + RegExpLiteral { + pattern: "a[ab\\]]".to_string(), + flags: String::new(), + } + ); +} + +#[test] +fn test_regex_literal_flags() { + // flags + assert_parse_success!( + regex_literal, + "/a/f", + RegExpLiteral { + pattern: "a".to_string(), + flags: "f".to_string(), + } + ); + assert_parse_success!( + regex_literal, + "/a/fi", + RegExpLiteral { + pattern: "a".to_string(), + flags: "fi".to_string(), + } + ); + assert_parse_failure!(regex_literal, "/a/\\u1234"); +} + +#[test] +fn test_template_element_empty() { + assert_parse_success!( + template, + "``", + TemplateElement { + raw: String::new(), + cooked: String::new(), + loc: Some(((1, 0), (1, 2)).into()) + } + ); +} + +#[test] +fn test_template_element_no_substitution_template() { + assert_parse_success!( + template, + "`asd`", + TemplateElement { + raw: "asd".to_string(), + cooked: "asd".to_string(), + loc: Some(((1, 0), (1, 5)).into()) + } + ); +} + +#[test] +fn test_template_element_template_head() { + assert_parse_success!( + template, + "`asd ${", + TemplateElement { + raw: "asd ".to_string(), + cooked: "asd ".to_string(), + loc: Some(((1, 0), (1, 7)).into()) + } + ); +} + +#[test] +fn test_template_element_template_middle() { + assert_parse_success!( + template_substition_tail, + "} asd ${", + TemplateElement { + raw: " asd ".to_string(), + cooked: " asd ".to_string(), + loc: Some(((1, 0), (1, 8)).into()) + } + ); +} + +#[test] +fn test_template_element_template_tail() { + // template_tail + assert_parse_success!( + template_substition_tail, + "} asd", + TemplateElement { + raw: " asd".to_string(), + cooked: " asd".to_string(), + loc: Some(((1, 0), (1, 5)).into()) + } + ); +} + +#[test] +fn test_template_element_template_character() { + // $ + assert_parse_success!(template_character, "$", ('$', "$".to_string())); + // escape sequence + assert_parse_success!(template_character, "\\n", ('\n', "\\n".to_string())); + assert_parse_success!(template_character, "\\x0A", ('\n', "\\x0A".to_string())); + assert_parse_success!( + template_character, + "\\u2764", + ('❤', "\\u2764".to_string()) + ); + assert_parse_success!( + template_character, + "\\u{2764}", + ('❤', "\\u{2764}".to_string()) + ); + // line continuation + for line_continuation_char in "\r\n\u{2028}\u{2029}".chars() { + let slice: &str = &line_continuation_char.to_string(); + assert_parse_success!( + template_character, + slice, + (line_continuation_char, line_continuation_char.to_string()) + ); + } +} + +#[test] +fn test_primary_expression_this() { + assert_parse_success!( + primary_expression, + "this", + Expression::ThisExpression { + loc: Some(((1, 0), (1, 4)).into()) + } + ); +} + +#[test] +fn test_primary_expression_identifier_reference() { + assert_parse_success!( + primary_expression, + "abc123", + Expression::Identifier { + loc: Some(((1, 0), (1, 6)).into()), + name: "abc123".to_string() + } + ); +} + +#[test] +fn test_primary_expression_literal() { + assert_parse_success!( + primary_expression, + "null", + Expression::Literal { + value: Literal::NullLiteral(NullLiteral), + loc: Some(((1, 0), (1, 4)).into()) + } + ); + assert_parse_success!( + primary_expression, + "true", + Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: Some(((1, 0), (1, 4)).into()) + } + ); + assert_parse_success!( + primary_expression, + "false", + Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(false)), + loc: Some(((1, 0), (1, 5)).into()) + } + ); + assert_parse_success!( + primary_expression, + "123.e1", + Expression::Literal { + value: Literal::NumericLiteral(NumericLiteral(1230f64)), + loc: Some(((1, 0), (1, 6)).into()) + } + ); + assert_parse_success!( + primary_expression, + "'abc'", + Expression::Literal { + value: Literal::StringLiteral(StringLiteral("abc".to_string())), + loc: Some(((1, 0), (1, 5)).into()), + } + ); + assert_parse_success!( + primary_expression, + "/\\a/", + Expression::Literal { + value: Literal::RegExpLiteral(RegExpLiteral { + pattern: "\\a".to_string(), + flags: String::new(), + }), + loc: Some(((1, 0), (1, 4)).into()) + } + ); +} + +#[test] +fn test_primary_expression_array_literal_empty() { + assert_parse_success!( + primary_expression, + "[]", + Expression::ArrayExpression { + loc: Some(((1, 0), (1, 2)).into()), + elements: Vec::new() + } + ); +} + +#[test] +fn test_primary_expression_array_literal_elision() { + assert_parse_success!( + primary_expression, + "[,,,,]", + Expression::ArrayExpression { + loc: Some(((1, 0), (1, 6)).into()), + elements: Vec::new() + } + ); +} + +#[test] +fn test_primary_expression_array_literal_elision_and_elements() { + assert_parse_success!( + primary_expression, + "[,,,,yield,,yield,,,]", + Expression::ArrayExpression { + loc: Some(((1, 0), (1, 21)).into()), + elements: vec![ + ExpressionListItem::Expression(Expression::Yield { + argument: None, + delegate: false, + }), + ExpressionListItem::Expression(Expression::Yield { + argument: None, + delegate: false, + }), + ], + } + ); + assert_parse_success!( + primary_expression, + "[,,,...yield,,,]", + Expression::ArrayExpression { + loc: Some(((1, 0), (1, 16)).into()), + elements: vec![ExpressionListItem::Spread( + Some(((1, 4), (1, 12)).into()), + Expression::Yield { + argument: None, + delegate: false, + }, + )], + } + ); +} + +#[test] +fn test_primary_expression_object_literal_empty() { + assert_parse_success!( + primary_expression, + "{}", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 2)).into()), + properties: Vec::new() + } + ); +} + +#[test] +fn test_primary_expression_object_literal_shorthand() { + assert_parse_success!( + primary_expression, + "{ id }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 6)).into()), + properties: vec![Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + value: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + method: false, + shorthand: true, + computed: false, + loc: Some(((1, 2), (1, 4)).into()), + }], + } + ); +} + +#[test] +fn test_primary_expression_object_literal_multiple_properties() { + assert_parse_success!( + primary_expression, + "{ id, id2 }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 11)).into()), + properties: vec![ + Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + value: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + method: false, + shorthand: true, + computed: false, + loc: Some(((1, 2), (1, 4)).into()), + }, + Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 6), (1, 9)).into()), + name: "id2".to_string(), + }, + value: Expression::Identifier { + loc: Some(((1, 6), (1, 9)).into()), + name: "id2".to_string(), + }, + method: false, + shorthand: true, + computed: false, + loc: Some(((1, 6), (1, 9)).into()), + }, + ], + } + ); +} + +#[test] +fn test_primary_expression_object_literal_multiple_properties_ending_semicolon() { + assert_parse_success!( + primary_expression, + "{ id, id2, }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 12)).into()), + properties: vec![ + Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + value: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + method: false, + shorthand: true, + computed: false, + loc: Some(((1, 2), (1, 4)).into()), + }, + Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 6), (1, 9)).into()), + name: "id2".to_string(), + }, + value: Expression::Identifier { + loc: Some(((1, 6), (1, 9)).into()), + name: "id2".to_string(), + }, + method: false, + shorthand: true, + computed: false, + loc: Some(((1, 6), (1, 9)).into()), + }, + ], + } + ); +} + +#[test] +fn test_primary_expression_object_literal_initializer() { + assert_parse_success!( + primary_expression, + "{ id: true }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 12)).into()), + properties: vec![Property { + kind: PropertyKind::Init, + key: Expression::Identifier { + loc: Some(((1, 2), (1, 4)).into()), + name: "id".to_string(), + }, + value: Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: Some(((1, 6), (1, 10)).into()), + }, + method: false, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 10)).into()), + }], + } + ); +} + +#[test] +fn test_object_literal_initializer_string_literal() { + assert_parse_success!( + primary_expression, + "{ 'id': true }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 14)).into()), + properties: vec![Property { + kind: PropertyKind::Init, + key: Expression::Literal { + value: Literal::StringLiteral(StringLiteral("id".to_string())), + loc: Some(((1, 2), (1, 6)).into()), + }, + value: Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: Some(((1, 8), (1, 12)).into()), + }, + method: false, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 12)).into()), + }], + } + ); +} + +#[test] +fn test_object_literal_initializer_numeric_literal() { + assert_parse_success!( + primary_expression, + "{ 0: true }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 11)).into()), + properties: vec![Property { + kind: PropertyKind::Init, + key: Expression::Literal { + value: Literal::NumericLiteral(NumericLiteral(0f64)), + loc: Some(((1, 2), (1, 3)).into()), + }, + value: Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: Some(((1, 5), (1, 9)).into()), + }, + method: false, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 9)).into()), + }], + } + ); +} + +#[test] +fn test_object_literal_initializer_computed() { + assert_parse_success!( + primary_expression, + "{ [yield]: true }", + Expression::ObjectExpression { + loc: Some(((1, 0), (1, 17)).into()), + properties: vec![Property { + kind: PropertyKind::Init, + key: Expression::Yield { + argument: None, + delegate: false, + }, + value: Expression::Literal { + value: Literal::BooleanLiteral(BooleanLiteral(true)), + loc: Some(((1, 11), (1, 15)).into()), + }, + method: false, + shorthand: false, + computed: true, + loc: Some(((1, 2), (1, 15)).into()), + }], + } + ); +} + +#[test] +fn test_object_literal_method_definition() { + assert_parse_success!( + primary_expression, + "{ method() { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "method".to_string(), + loc: Some(((1, 2), (1, 8)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![], + body: vec![], + async: false, + generator: false, + }, + kind: PropertyKind::Init, + method: true, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 15)).into()), + }], + loc: Some(((1, 0), (1, 17)).into()), + } + ); +} + +#[test] +fn test_object_literal_method_definition_generator() { + assert_parse_success!( + primary_expression, + "{ * method() { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "method".to_string(), + loc: Some(((1, 4), (1, 10)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![], + body: vec![], + async: false, + generator: true, + }, + kind: PropertyKind::Init, + method: true, + shorthand: false, + computed: false, + loc: Some(((1, 4), (1, 17)).into()), + }], + loc: Some(((1, 0), (1, 19)).into()), + } + ); +} + +#[test] +fn test_object_literal_method_definition_async() { + assert_parse_success!( + primary_expression, + "{ async method() { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "method".to_string(), + loc: Some(((1, 8), (1, 14)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![], + body: vec![], + async: true, + generator: false, + }, + kind: PropertyKind::Init, + method: true, + shorthand: false, + computed: false, + loc: Some(((1, 8), (1, 21)).into()), + }], + loc: Some(((1, 0), (1, 23)).into()), + } + ); +} + +#[test] +fn test_object_literal_method_definition_async_generator() { + assert_parse_success!( + primary_expression, + "{ async * method() { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "method".to_string(), + loc: Some(((1, 10), (1, 16)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![], + body: vec![], + async: true, + generator: true, + }, + kind: PropertyKind::Init, + method: true, + shorthand: false, + computed: false, + loc: Some(((1, 10), (1, 23)).into()), + }], + loc: Some(((1, 0), (1, 25)).into()), + } + ); +} + +#[test] +fn test_object_literal_method_definition_getter() { + assert_parse_success!( + primary_expression, + "{ get key() { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "key".to_string(), + loc: Some(((1, 6), (1, 9)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![], + body: vec![], + async: false, + generator: false, + }, + kind: PropertyKind::Get, + method: false, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 16)).into()), + }], + loc: Some(((1, 0), (1, 18)).into()), + } + ); +} + +#[test] +fn test_object_literal_method_definition_setter() { + assert_parse_success!( + primary_expression, + "{ set key(value) { } }", + Expression::ObjectExpression { + properties: vec![Property { + key: Expression::Identifier { + name: "key".to_string(), + loc: Some(((1, 6), (1, 9)).into()), + }, + value: Expression::FunctionExpression { + id: None, + params: vec![Pattern::Identifier(Identifier( + Some(((1, 10), (1, 15)).into()), + "value".to_string(), + ))], + body: vec![], + async: false, + generator: false, + }, + kind: PropertyKind::Set, + method: false, + shorthand: false, + computed: false, + loc: Some(((1, 2), (1, 21)).into()), + }], + loc: Some(((1, 0), (1, 23)).into()), + } + ); +} + +#[test] +fn test_primary_expression_jsx_self_closing() { + assert_parse_success!( + primary_expression, + "
", + Expression::JsxElementExpression { + attributes: Vec::new(), + children: Vec::new(), + name: "div".to_string(), + loc: Some(((1, 0), (1, 6)).into()) + } + ); +} + +#[test] +fn test_primary_expression_jsx_opening_closing_match() { + assert_parse_success!( + primary_expression, + "
\n\n
", + Expression::JsxElementExpression { + attributes: Vec::new(), + children: Vec::new(), + name: "div".to_string(), + loc: Some(((1, 0), (3, 6)).into()) + } + ); + assert_parse_failure!(primary_expression, "
\n\n"); +}