diff --git a/cellbuf/wrap.go b/cellbuf/wrap.go new file mode 100644 index 00000000..59a2a337 --- /dev/null +++ b/cellbuf/wrap.go @@ -0,0 +1,178 @@ +package cellbuf + +import ( + "bytes" + "unicode" + "unicode/utf8" + + "github.com/charmbracelet/x/ansi" +) + +// Wrap returns a string that is wrapped to the specified limit applying any +// ANSI escape sequences in the string. It tries to wrap the string at word +// boundaries, but will break words if necessary. +// +// The breakpoints string is a list of characters that are considered +// breakpoints for word wrapping. A hyphen (-) is always considered a +// breakpoint. +// +// Note: breakpoints must be a string of 1-cell wide rune characters. +func Wrap(s string, limit int, breakpoints string) string { + if len(s) == 0 { + return "" + } + + if limit < 1 { + return s + } + + p := ansi.GetParser() + defer ansi.PutParser(p) + + var ( + buf bytes.Buffer + word bytes.Buffer + space bytes.Buffer + style, curStyle Style + link, curLink Link + curWidth int + wordLen int + ) + + addSpace := func() { + curWidth += space.Len() + buf.Write(space.Bytes()) + space.Reset() + } + + addWord := func() { + if word.Len() == 0 { + return + } + + curLink = link + curStyle = style + + addSpace() + curWidth += wordLen + buf.Write(word.Bytes()) + word.Reset() + wordLen = 0 + } + + addNewline := func() { + if !curStyle.Empty() { + buf.WriteString(ansi.ResetStyle) + } + if !curLink.Empty() { + buf.WriteString(ansi.ResetHyperlink()) + } + buf.WriteByte('\n') + if !curLink.Empty() { + buf.WriteString(ansi.SetHyperlink(curLink.URL, curLink.Params)) + } + if !curStyle.Empty() { + buf.WriteString(curStyle.Sequence()) + } + curWidth = 0 + space.Reset() + } + + var state byte + for len(s) > 0 { + seq, width, n, newState := ansi.DecodeSequence(s, state, p) + switch width { + case 0: + if ansi.Equal(seq, "\t") { + addWord() + space.WriteString(seq) + break + } else if ansi.Equal(seq, "\n") { + if wordLen == 0 { + if curWidth+space.Len() > limit { + curWidth = 0 + } else { + // preserve whitespaces + buf.Write(space.Bytes()) + } + space.Reset() + } + + addWord() + addNewline() + break + } else if ansi.HasCsiPrefix(seq) && p.Command() == 'm' { + // SGR style sequence [ansi.SGR] + ReadStyle(p.Params(), &style) + } else if ansi.HasOscPrefix(seq) && p.Command() == 8 { + // Hyperlink sequence [ansi.SetHyperlink] + ReadLink(p.Data(), &link) + } + + word.WriteString(seq) + default: + if len(seq) == 1 { + // ASCII + r, _ := utf8.DecodeRuneInString(seq) + if unicode.IsSpace(r) { + addWord() + space.WriteRune(r) + break + } else if r == '-' || runeContainsAny(r, breakpoints) { + addSpace() + if curWidth+wordLen+width <= limit { + addWord() + buf.WriteString(seq) + curWidth += width + break + } + } + } + + if wordLen+width > limit { + // Hardwrap the word if it's too long + addWord() + } + + word.WriteString(seq) + wordLen += width + + if curWidth+wordLen+space.Len() > limit { + addNewline() + } + } + + s = s[n:] + state = newState + } + + if wordLen == 0 { + if curWidth+space.Len() > limit { + curWidth = 0 + } else { + // preserve whitespaces + buf.Write(space.Bytes()) + } + space.Reset() + } + + addWord() + + if !curLink.Empty() { + buf.WriteString(ansi.ResetHyperlink()) + } + if !curStyle.Empty() { + buf.WriteString(ansi.ResetStyle) + } + + return buf.String() +} + +func runeContainsAny[T string | []rune](r rune, s T) bool { + for _, c := range []rune(s) { + if c == r { + return true + } + } + return false +} diff --git a/cellbuf/wrap_test.go b/cellbuf/wrap_test.go new file mode 100644 index 00000000..543d3ca2 --- /dev/null +++ b/cellbuf/wrap_test.go @@ -0,0 +1,137 @@ +package cellbuf + +import ( + "testing" +) + +var wrapCases = []struct { + name string + input string + expected string + width int +}{ + { + name: "simple", + input: "I really \x1B[38;2;249;38;114mlove the\x1B[0m Go language!", + expected: "I really \x1B[38;2;249;38;114mlove\x1b[m\n\x1B[38;2;249;38;114mthe\x1B[0m Go\nlanguage!", + width: 14, + }, + { + name: "passthrough", + input: "hello world", + expected: "hello world", + width: 11, + }, + { + name: "asian", + input: "こんにち", + expected: "こんに\nち", + width: 7, + }, + { + name: "emoji", + input: "😃👰🏻‍♀️🫧", + expected: "😃\n👰🏻‍♀️\n🫧", + width: 2, + }, + { + name: "long style", + input: "\x1B[38;2;249;38;114ma really long string\x1B[0m", + expected: "\x1B[38;2;249;38;114ma really\x1b[m\n\x1B[38;2;249;38;114mlong\x1b[m\n\x1B[38;2;249;38;114mstring\x1B[0m", + width: 10, + }, + { + name: "long style nbsp", + input: "\x1B[38;2;249;38;114ma really\u00a0long string\x1B[0m", + expected: "\x1b[38;2;249;38;114ma\x1b[m\n\x1b[38;2;249;38;114mreally\u00a0lon\x1b[m\n\x1b[38;2;249;38;114mg string\x1b[0m", + width: 10, + }, + { + name: "longer", + input: "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog.", + expected: "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", + width: 16, + }, + { + name: "longer asian", + input: "猴 猴 猴猴 猴猴猴猴猴猴猴猴猴 猴猴猴 猴猴 猴’ 猴猴 猴.", + expected: "猴 猴 猴猴\n猴猴猴猴猴猴猴猴\n猴 猴猴猴 猴猴\n猴’ 猴猴 猴.", + width: 16, + }, + { + name: "long input", + input: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-on-the-rocks.", + expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/animal-like-law-\non-the-rocks.", + width: 76, + }, + { + name: "long input2", + input: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-operating-system.", + expected: "Rotated keys for a-good-offensive-cheat-code-incorporated/crypto-line-\noperating-system.", + width: 76, + }, + { + name: "hyphen breakpoint", + input: "a-good-offensive-cheat-code", + expected: "a-good-\noffensive-\ncheat-code", + width: 10, + }, + { + name: "exact", + input: "\x1b[91mfoo\x1b[0m", + expected: "\x1b[91mfoo\x1b[0m", + width: 3, + }, + { + // XXX: Should we preserve spaces on text wrapping? + name: "extra space", + input: "foo ", + expected: "foo", + width: 3, + }, + { + name: "extra space style", + input: "\x1b[mfoo \x1b[m", + expected: "\x1b[mfoo\x1b[m", + width: 3, + }, + { + name: "paragraph with styles", + input: "Lorem ipsum dolor \x1b[1msit\x1b[m amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \x1b[31mUt enim\x1b[m ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea \x1b[38;5;200mcommodo consequat\x1b[m. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. \x1b[1;2;33mExcepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\x1b[m", + expected: "Lorem ipsum dolor \x1b[1msit\x1b[m amet,\nconsectetur adipiscing elit,\nsed do eiusmod tempor\nincididunt ut labore et dolore\nmagna aliqua. \x1b[31mUt enim\x1b[m ad minim\nveniam, quis nostrud\nexercitation ullamco laboris\nnisi ut aliquip ex ea \x1b[38;5;200mcommodo\x1b[m\n\x1b[38;5;200mconsequat\x1b[m. Duis aute irure\ndolor in reprehenderit in\nvoluptate velit esse cillum\ndolore eu fugiat nulla\npariatur. \x1b[1;2;33mExcepteur sint\x1b[m\n\x1b[1;2;33moccaecat cupidatat non\x1b[m\n\x1b[1;2;33mproident, sunt in culpa qui\x1b[m\n\x1b[1;2;33mofficia deserunt mollit anim\x1b[m\n\x1b[1;2;33mid est laborum.\x1b[m", + width: 30, + }, + {"hyphen break", "foo-bar", "foo-\nbar", 5}, + {"double space", "f bar foobaz", "f bar\nfoobaz", 6}, + {"passthrough", "foobar\n ", "foobar\n ", 0}, + {"pass", "foo", "foo", 3}, + {"toolong", "foobarfoo", "foob\narfo\no", 4}, + {"white space", "foo bar foo", "foo\nbar\nfoo", 4}, + {"broken_at_spaces", "foo bars foobars", "foo\nbars\nfoob\nars", 4}, + {"hyphen", "foob-foobar", "foob\n-foo\nbar", 4}, + {"wide_emoji_breakpoint", "foo🫧 foobar", "foo\n🫧\nfoob\nar", 4}, + {"space_breakpoint", "foo --bar", "foo --bar", 9}, + {"simple", "foo bars foobars", "foo\nbars\nfoob\nars", 4}, + {"limit", "foo bar", "foo\nbar", 5}, + {"remove white spaces", "foo \nb ar ", "foo\nb\nar", 4}, + {"white space trail width", "foo\nb\t a\n bar", "foo\nb\t a\n bar", 4}, + {"explicit_line_break", "foo bar foo\n", "foo\nbar\nfoo\n", 4}, + {"explicit_breaks", "\nfoo bar\n\n\nfoo\n", "\nfoo\nbar\n\n\nfoo\n", 4}, + {"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo \nbar ", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar", 6}, + {"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7}, + {"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", "\x1b[38;2;249;38;114m(\x1b[0m\x1b[38;2;248;248;242mjust\x1b[m\n\x1b[38;2;248;248;242manother\x1b[m\n\x1b[38;2;248;248;242mtest\x1b[38;2;249;38;114m)\x1b[0m", 7}, + {"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\x1b]8;;\x07\n\x1b]8;;https://example.com\x07สวัสดีสวัสดี\x1b]8;;\x1b\\", 8}, + {"tab", "foo\tbar", "foo\nbar", 3}, + {"wrapped styles example", "", "", 10}, +} + +func TestWrap(t *testing.T) { + for i, tc := range wrapCases { + t.Run(tc.name, func(t *testing.T) { + output := Wrap(tc.input, tc.width, "") + if output != tc.expected { + t.Errorf("case %d, input %q, expected %q, got %q", i+1, tc.input, tc.expected, output) + } + }) + } +}