Skip to content

Commit

Permalink
[bugfix] Use custom bluemonday policy to disallow inline img tags (#2100
Browse files Browse the repository at this point in the history
)
  • Loading branch information
tsmethurst authored Aug 11, 2023
1 parent 3aedd93 commit dc96562
Show file tree
Hide file tree
Showing 17 changed files with 244 additions and 79 deletions.
30 changes: 27 additions & 3 deletions internal/ap/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package ap
import (
"github.com/superseriousbusiness/activity/pub"
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/gotosocial/internal/text"
)

/*
Expand Down Expand Up @@ -126,7 +127,8 @@ func NormalizeIncomingActivityObject(activity pub.Activity, rawJSON map[string]i
}

// NormalizeIncomingContent replaces the Content of the given item
// with the raw 'content' value from the raw json object map.
// with the sanitized version of the raw 'content' value from the
// raw json object map.
//
// noop if there was no content in the json object map or the
// content was not a plain string.
Expand All @@ -145,6 +147,14 @@ func NormalizeIncomingContent(item WithSetContent, rawJSON map[string]interface{
return
}

// Content should be HTML encoded by default:
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
//
// TODO: sanitize differently based on mediaType.
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
content = text.SanitizeToHTML(content)
content = text.MinifyHTML(content)

// Set normalized content property from the raw string;
// this replaces any existing content property on the item.
contentProp := streams.NewActivityStreamsContentProperty()
Expand All @@ -154,7 +164,8 @@ func NormalizeIncomingContent(item WithSetContent, rawJSON map[string]interface{

// NormalizeIncomingAttachments normalizes all attachments (if any) of the given
// item, replacing the 'name' (aka content warning) field of each attachment
// with the raw 'name' value from the raw json object map.
// with the raw 'name' value from the raw json object map, and doing sanitization
// on the result.
//
// noop if there are no attachments; noop if attachment is not a format
// we can understand.
Expand Down Expand Up @@ -212,7 +223,8 @@ func NormalizeIncomingAttachments(item WithAttachment, rawJSON map[string]interf
}

// NormalizeIncomingSummary replaces the Summary of the given item
// with the raw 'summary' value from the raw json object map.
// with the sanitized version of the raw 'summary' value from the
// raw json object map.
//
// noop if there was no summary in the json object map or the
// summary was not a plain string.
Expand All @@ -229,6 +241,11 @@ func NormalizeIncomingSummary(item WithSetSummary, rawJSON map[string]interface{
return
}

// Summary should be HTML encoded:
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-summary
summary = text.SanitizeToHTML(summary)
summary = text.MinifyHTML(summary)

// Set normalized summary property from the raw string; this
// will replace any existing summary property on the item.
summaryProp := streams.NewActivityStreamsSummaryProperty()
Expand All @@ -254,6 +271,13 @@ func NormalizeIncomingName(item WithSetName, rawJSON map[string]interface{}) {
return
}

// Name *must not* include any HTML markup:
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-name
//
// todo: We probably want to update this to allow
// *escaped* HTML markup, but for now just nuke it.
name = text.SanitizeToPlaintext(name)

// Set normalized name property from the raw string; this
// will replace any existing name property on the item.
nameProp := streams.NewActivityStreamsNameProperty()
Expand Down
16 changes: 8 additions & 8 deletions internal/ap/normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (suite *NormalizeTestSuite) getStatusableWithMultipleAttachments() (vocab.A
"type": "Document",
"mediaType": "image/jpeg",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg",
"name": "danger: #cute but will claw you :("
"name": "image of a cat & there's a note saying: <danger: #cute but will claw you :(>"
}
]
}`)
Expand Down Expand Up @@ -192,7 +192,7 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
)

ap.NormalizeIncomingActivityObject(create, map[string]interface{}{"object": rawNote})
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
}

func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() {
Expand Down Expand Up @@ -224,7 +224,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
Expand Down Expand Up @@ -265,7 +265,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
Expand Down Expand Up @@ -304,7 +304,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
},
{
"mediaType": "image/jpeg",
"name": "danger: #cute%20but%20will%20claw%20you%20:(",
"name": "image of a cat \u0026amp; there's a note saying: \u0026lt;danger: #cute but will claw you :(\u0026gt;",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}
Expand All @@ -326,7 +326,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
"attachment": [
{
"mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
Expand All @@ -343,7 +343,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
},
{
"mediaType": "image/jpeg",
"name": "danger: #cute but will claw you :(",
"name": "image of a cat \u0026 there's a note saying:",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
}
Expand Down Expand Up @@ -380,7 +380,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableSummary() {
suite.Equal(`warning: #WEIRD%20%23SUMMARY%20;;;;a;;a;asv%20%20%20%20khop8273987(*%5E&%5E)`, ap.ExtractSummary(statusable))

ap.NormalizeIncomingSummary(statusable, rawAccount)
suite.Equal(`warning: #WEIRD #SUMMARY ;;;;a;;a;asv khop8273987(*^&^)`, ap.ExtractSummary(statusable))
suite.Equal(`warning: #WEIRD #SUMMARY ;;;;a;;a;asv khop8273987(*^&^)`, ap.ExtractSummary(statusable))
}

func (suite *NormalizeTestSuite) TestNormalizeStatusableName() {
Expand Down
2 changes: 1 addition & 1 deletion internal/api/client/statuses/statuscreate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ type StatusCreateTestSuite struct {
const (
statusWithLinksAndTags = "#test alright, should be able to post #links with fragments in them now, let's see........\n\nhttps://docs.gotosocial.org/en/latest/user_guide/posts/#links\n\n#gotosocial\n\n(tobi remember to pull the docker image challenge)"
statusMarkdown = "# Title\n\n## Smaller title\n\nThis is a post written in [markdown](https://www.markdownguide.org/)\n\n<img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\"/>"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\">"
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p>"
)

// Post a new status with some custom visibility settings
Expand Down
2 changes: 1 addition & 1 deletion internal/processing/account/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (p *Processor) Create(
Username: form.Username,
Email: form.Email,
Password: form.Password,
Reason: text.SanitizePlaintext(reason),
Reason: text.SanitizeToPlaintext(reason),
PreApproved: !config.GetAccountsApprovalRequired(), // Mark as approved if no approval required.
SignUpIP: form.IP,
Locale: form.Locale,
Expand Down
8 changes: 4 additions & 4 deletions internal/processing/account/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
}

// Parse new display name (always from plaintext).
account.DisplayName = text.SanitizePlaintext(displayName)
account.DisplayName = text.SanitizeToPlaintext(displayName)

// If display name has changed, account emojis may have also changed.
emojisChanged = true
Expand Down Expand Up @@ -110,8 +110,8 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form

// Sanitize raw field values.
fieldRaw := &gtsmodel.Field{
Name: text.SanitizePlaintext(name),
Value: text.SanitizePlaintext(value),
Name: text.SanitizeToPlaintext(name),
Value: text.SanitizeToPlaintext(value),
}
fieldsRaw = append(fieldsRaw, fieldRaw)
}
Expand Down Expand Up @@ -255,7 +255,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
if err := validate.CustomCSS(customCSS); err != nil {
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
account.CustomCSS = text.SanitizePlaintext(customCSS)
account.CustomCSS = text.SanitizeToPlaintext(customCSS)
}

if form.EnableRSS != nil {
Expand Down
4 changes: 2 additions & 2 deletions internal/processing/admin/domainblock.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ func (p *Processor) DomainBlockCreate(
ID: id.NewULID(),
Domain: domain,
CreatedByAccountID: account.ID,
PrivateComment: text.SanitizePlaintext(privateComment),
PublicComment: text.SanitizePlaintext(publicComment),
PrivateComment: text.SanitizeToPlaintext(privateComment),
PublicComment: text.SanitizeToPlaintext(publicComment),
Obfuscate: &obfuscate,
SubscriptionID: subscriptionID,
}
Expand Down
8 changes: 4 additions & 4 deletions internal/processing/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
return nil, gtserror.NewErrorBadRequest(err, fmt.Sprintf("site title invalid: %s", err))
}
updatingColumns = append(updatingColumns, "title")
instance.Title = text.SanitizePlaintext(*form.Title) // don't allow html in site title
instance.Title = text.SanitizeToPlaintext(*form.Title) // don't allow html in site title
}

// validate & update site contact account if it's set on the form
Expand Down Expand Up @@ -215,7 +215,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
updatingColumns = append(updatingColumns, "short_description")
instance.ShortDescription = text.SanitizeHTML(*form.ShortDescription) // html is OK in site description, but we should sanitize it
instance.ShortDescription = text.SanitizeToHTML(*form.ShortDescription) // html is OK in site description, but we should sanitize it
}

// validate & update site description if it's set on the form
Expand All @@ -224,7 +224,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
updatingColumns = append(updatingColumns, "description")
instance.Description = text.SanitizeHTML(*form.Description) // html is OK in site description, but we should sanitize it
instance.Description = text.SanitizeToHTML(*form.Description) // html is OK in site description, but we should sanitize it
}

// validate & update site terms if it's set on the form
Expand All @@ -233,7 +233,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
updatingColumns = append(updatingColumns, "terms")
instance.Terms = text.SanitizeHTML(*form.Terms) // html is OK in site terms, but we should sanitize it
instance.Terms = text.SanitizeToHTML(*form.Terms) // html is OK in site terms, but we should sanitize it
}

var updateInstanceAccount bool
Expand Down
2 changes: 1 addition & 1 deletion internal/processing/media/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, media
var updatingColumns []string

if form.Description != nil {
attachment.Description = text.SanitizePlaintext(*form.Description)
attachment.Description = text.SanitizeToPlaintext(*form.Description)
updatingColumns = append(updatingColumns, "description")
}

Expand Down
2 changes: 1 addition & 1 deletion internal/processing/status/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, appli
Local: &local,
AccountID: account.ID,
AccountURI: account.URI,
ContentWarning: text.SanitizePlaintext(form.SpoilerText),
ContentWarning: text.SanitizeToPlaintext(form.SpoilerText),
ActivityStreamsType: ap.ObjectNote,
Sensitive: &sensitive,
CreatedWithApplicationID: application.ID,
Expand Down
7 changes: 2 additions & 5 deletions internal/text/emojionly.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,10 @@ func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMe
result.HTML = htmlContentBytes.String()

// clean anything dangerous out of the HTML
result.HTML = SanitizeHTML(result.HTML)
result.HTML = SanitizeToHTML(result.HTML)

// shrink ray
result.HTML, err = m.String("text/html", result.HTML)
if err != nil {
log.Errorf(ctx, "error minifying HTML: %s", err)
}
result.HTML = MinifyHTML(result.HTML)

return result
}
7 changes: 2 additions & 5 deletions internal/text/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,10 @@ func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionF
result.HTML = htmlContentBytes.String()

// clean anything dangerous out of the HTML
result.HTML = SanitizeHTML(result.HTML)
result.HTML = SanitizeToHTML(result.HTML)

// shrink ray
result.HTML, err = m.String("text/html", result.HTML)
if err != nil {
log.Errorf(ctx, "error minifying HTML: %s", err)
}
result.HTML = MinifyHTML(result.HTML)

return result
}
2 changes: 1 addition & 1 deletion internal/text/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const (
withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
withHashtagExpected = "<h1>Title</h1><p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>"
mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a <a href=\"https://example.org\">link</a>.\n\nHere's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\">"
mdWithHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p><p>Here's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\" crossorigin=\"anonymous\"></p>"
mdWithHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p><p>Here's an image:</p>"
mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: <script>alert(ahhhh)</script>"
mdWithCheekyHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a cheeky little script:</p>"
mdWithHashtagInitial = "#welcome #Hashtag"
Expand Down
21 changes: 21 additions & 0 deletions internal/text/minify.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package text

import (
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
)
Expand All @@ -31,3 +32,23 @@ var m = func() *minify.M {
})
return m
}()

// MinifyHTML minifies the given string
// under the assumption that it's HTML.
//
// If input is not HTML encoded, this
// function will try to do minimization
// anyway, but this may produce unexpected
// results.
//
// If an error occurs during minimization,
// it will be logged and the original string
// returned unmodified.
func MinifyHTML(in string) string {
out, err := m.String("text/html", in)
if err != nil {
log.Error(nil, err)
}

return out
}
7 changes: 2 additions & 5 deletions internal/text/plain.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,10 @@ func (f *formatter) fromPlain(
result.HTML = htmlContentBytes.String()

// Clean anything dangerous out of resulting HTML.
result.HTML = SanitizeHTML(result.HTML)
result.HTML = SanitizeToHTML(result.HTML)

// Shrink ray!
var err error
if result.HTML, err = m.String("text/html", result.HTML); err != nil {
log.Errorf(ctx, "error minifying HTML: %s", err)
}
result.HTML = MinifyHTML(result.HTML)

return result
}
Expand Down
Loading

0 comments on commit dc96562

Please sign in to comment.