Skip to content

Commit

Permalink
refactor(md/codeBlock): use parser module; export parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
pskfyi authored and AustinArey committed Jun 8, 2023
1 parent 1cc011f commit 94b75a3
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 62 deletions.
37 changes: 23 additions & 14 deletions md/codeBlock/fenced.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { regexp } from "../../parser/regexp.ts";
import { line } from "../../parser/named.ts";
import { Text } from "../../string/Text.ts";
import { mostConsecutive } from "../../string/sequence.ts";
import type { Pretty } from "../../ts/types.ts";
import * as infoString from "./infoString.ts";
import { FENCED_CODE_BLOCK_REGEX } from "./regex.ts";

export type FencedCodeBlockDetails = Pretty<
& {
Expand Down Expand Up @@ -58,24 +59,32 @@ export function create(
return fence + _infoString + "\n" + code + "\n" + fence;
}

export function parse(codeBlock: string): FencedCodeBlockDetails {
const match = codeBlock.match(FENCED_CODE_BLOCK_REGEX);
const { fence, infoString: _infoString, code = "" } = match?.groups ?? {};
const { lang, meta } = infoString.parse(_infoString);
const type = "fenced" as const;
const fence = regexp(/^(?<fence>`{3,}|~{3,})([\s\S]*?)(\r?\n|\r)^\k<fence>/m)
.groups(1, 2)
.toObject("fence", "data");

export const parser = fence
.into(({ fence, data }) => {
const [{ lang, meta }, cursor] = infoString.parser.parse(data);
const code = cursor.remainder;
const type = "fenced" as const;

const data: FencedCodeBlockDetails = { type, fence, code };
const details: FencedCodeBlockDetails = { type, fence, code };

if (lang) data.lang = lang;
if (meta) data.meta = meta;
if (lang) details.lang = lang;
if (meta) details.meta = meta;

return data;
return details;
})
.named("md.codeBlock.fenced");

export function parse(codeBlock: string): FencedCodeBlockDetails {
return parser.parse(codeBlock)[0];
}

export function findAll(markdown: string): FencedCodeBlockSearchResult[] {
const text = new Text(markdown);
const regex = new RegExp(FENCED_CODE_BLOCK_REGEX, "gm");
const [results] = parser.node.or(line.ignore).zeroOrMore
.parse(markdown);

return [...markdown.matchAll(regex)]
.map((match) => [parse(match[0]), text.locationAt(match.index ?? 0)]);
return results.map(({ value: details, start }) => [details, start]);
}
2 changes: 1 addition & 1 deletion md/codeBlock/findAll.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ it("gets blocks & locations", () =>
findAll("foo\n\n bar\n\n```baz\nqux\n```"),
[
[
{ type: "indented", code: "bar" },
{ type: "indented", code: "bar\n" },
{ column: 1, line: 3, offset: 5 },
],
[
Expand Down
14 changes: 8 additions & 6 deletions md/codeBlock/findAll.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import { FencedCodeBlockSearchResult, findAll as fenced } from "./fenced.ts";
import { line } from "../../parser/named.ts";
import { FencedCodeBlockSearchResult, parser as fenced } from "./fenced.ts";
import {
findAll as indented,
IndentedCodeBlockSearchResult,
parser as indented,
} from "./indented.ts";

export type SearchResult =
| FencedCodeBlockSearchResult
| IndentedCodeBlockSearchResult;

export function findAll(markdown: string): SearchResult[] {
return [
...indented(markdown),
...fenced(markdown),
];
const [results] = fenced.node.or(indented.node).or(line.ignore).zeroOrMore
.parse(markdown);

return results
.map(({ value: details, start }) => [details, start] as SearchResult);
}
21 changes: 16 additions & 5 deletions md/codeBlock/indented.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,21 @@ describe("parse", () => {

describe("result.code", () => {
test("single lines", () => assertParse(" foo", "foo"));
test("extra indents", () => assertParse(" foo", "foo"));
test("multiple lines", () => assertParse(" foo\n bar", "foo\nbar"));
test("extra indents", () => assertParse(" foo", " foo"));
test("multiple lines", () => {
assertParse(" foo\n bar", "foo\nbar");
assertParse(" foo\n bar\n baz", "foo\nbar\nbaz");
});
test("mixed indents", () => assertParse(" foo\n bar", " foo\nbar"));
test("blank lines", () => assertParse(" X\n\n\n Y", "X\n\n\nY"));
test("blank lines", () => {
assertParse(" X\n\n\n Y", "X\n\n\nY");
assertParse(" X\n \n Y", "X\n \nY");
});
});

test("invalid cases", () => {
assertThrows(() => parse(" \n X"));
assertThrows(() => parse(" \n X"));
});
});

Expand All @@ -38,11 +49,11 @@ test("findAll", () =>
),
[
[
{ code: "foo\n\nbar", type },
{ code: "foo\n\nbar\n", type },
{ column: 1, line: 2, offset: 3 },
],
[
{ code: " quux", type },
{ code: " quux\n", type },
{ column: 1, line: 9, offset: 36 },
],
],
Expand Down
38 changes: 21 additions & 17 deletions md/codeBlock/indented.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { Text } from "../../string/Text.ts";
import { regexp } from "../../parser/regexp.ts";
import { string } from "../../parser/string.ts";
import { line, newline, whitespace } from "../../parser/named.ts";
import type { Text } from "../../string/Text.ts";
import { indent } from "../../string/indent.ts";
import { INDENTED_CODE_BLOCK_REGEX } from "./regex.ts";

export type IndentedCodeBlockDetails = {
type: "indented";
Expand All @@ -11,22 +13,24 @@ export function create(code: string): string {
return indent(code, 4);
}

export function parse(codeBlock: string): IndentedCodeBlockDetails {
const lines = codeBlock.split("\n");
const regex = /^ {4,}/;
const indent = Math.min(...lines
.map((line) => line.match(regex)?.[0].length || Infinity));
const indented = string(" ").ignore;
const indentedNonBlankLine = indented.and(line.nonBlank);

if (indent === Infinity) throw new TypeError("Invalid indented code block");
const upToFourSpaces = regexp(/^ {0,4}/).match.ignore;
const blankLines = indented.and(whitespace.inline)
.or(upToFourSpaces.and(newline))
.zeroOrMore.join();

const code = lines
.map((line) => line.slice(indent).trimEnd())
.join("\n")
.replace(/(^\n+|\n+$)/g, "");
const firstLine = indentedNonBlankLine;
const subsequentLines = blankLines.and(indentedNonBlankLine).join()
.zeroOrMore.join();

const type = "indented" as const;
export const parser = firstLine.and(subsequentLines).join()
.into((code) => ({ type: "indented" as const, code }))
.named("md.codeBlock.indented");

return { type, code };
export function parse(codeBlock: string): IndentedCodeBlockDetails {
return parser.parse(codeBlock)[0];
}

export type IndentedCodeBlockSearchResult = [
Expand All @@ -35,8 +39,8 @@ export type IndentedCodeBlockSearchResult = [
];

export function findAll(markdown: string): IndentedCodeBlockSearchResult[] {
const text = new Text(markdown);
const [results] = parser.node.or(line.ignore).zeroOrMore
.parse(markdown);

return [...markdown.matchAll(INDENTED_CODE_BLOCK_REGEX)]
.map((match) => [parse(match[0]), text.locationAt(match.index ?? 0)]);
return results.map(({ value: details, start }) => [details, start]);
}
24 changes: 18 additions & 6 deletions md/codeBlock/infoString.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { splitOnFirst } from "../../string/splitOn.ts";
import { regexp } from "../../parser/regexp.ts";
import { line } from "../../parser/named.ts";

export class InfoStringError extends TypeError {
constructor(infoString: string) {
Expand Down Expand Up @@ -54,11 +55,22 @@ export function stringify({ lang, meta }: Info = {}): string {
: lang || "";
}

/* See https://spec.commonmark.org/0.30/#info-string */
export function parse(infoString: string): Info {
if (!infoString) return {};
const lang = regexp(/^\S+/).match; // one or more non-whitespace characters
const meta = line; // the rest of the line

export const parser = lang.optional.and(meta.optional)
.into(([lang, meta]) => {
const info: Info = {};
meta = meta?.trim();

const [lang, meta] = splitOnFirst(" ", infoString);
if (lang) info.lang = lang;
if (meta) info.meta = meta;

return { lang, meta };
return info;
})
.named("md.codeBlock.infoString");

/* See https://spec.commonmark.org/0.30/#info-string */
export function parse(infoString: string): Info {
return parser.parse(infoString)[0];
}
13 changes: 0 additions & 13 deletions md/codeBlock/regex.ts

This file was deleted.

0 comments on commit 94b75a3

Please sign in to comment.