From dc2438035d86f2928bba1fe08715cacee8603b17 Mon Sep 17 00:00:00 2001 From: Karasiq Date: Fri, 18 Sep 2020 19:38:06 +0300 Subject: [PATCH] HTML sanitizer improvements --- .../src/main/resources/reference.conf | 3 ++- .../markdown/HtmlMetadataParser.scala | 21 +++++++++++++++++-- project/WebDeps.scala | 4 ++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/metadata/markdown/src/main/resources/reference.conf b/metadata/markdown/src/main/resources/reference.conf index c0c521c9..6cc2636e 100644 --- a/metadata/markdown/src/main/resources/reference.conf +++ b/metadata/markdown/src/main/resources/reference.conf @@ -19,7 +19,8 @@ shadowcloud.metadata { extensions = [html, htm, xhtml] mimes = [text/html] size-limit = 20M - remove-elements = [img, video, link, script, style, button, form, input, textarea, frame, iframe, svg, select] + remove-elements = [link, script, style, button, input, textarea, frame, iframe, svg, select] + img-allowed-hosts = [localhost, 127.0.0.1] } } } diff --git a/metadata/markdown/src/main/scala/com/karasiq/shadowcloud/metadata/markdown/HtmlMetadataParser.scala b/metadata/markdown/src/main/scala/com/karasiq/shadowcloud/metadata/markdown/HtmlMetadataParser.scala index 1baab918..b2cf0bd4 100644 --- a/metadata/markdown/src/main/scala/com/karasiq/shadowcloud/metadata/markdown/HtmlMetadataParser.scala +++ b/metadata/markdown/src/main/scala/com/karasiq/shadowcloud/metadata/markdown/HtmlMetadataParser.scala @@ -1,5 +1,7 @@ package com.karasiq.shadowcloud.metadata.markdown +import java.net.URI + import akka.NotUsed import akka.stream.scaladsl.Flow import akka.util.ByteString @@ -12,6 +14,7 @@ import com.typesafe.config.Config import org.jsoup.Jsoup import scala.collection.JavaConverters._ +import scala.util.Try object HtmlMetadataParser { def apply(config: Config): HtmlMetadataParser = new HtmlMetadataParser(config) @@ -19,9 +22,10 @@ object HtmlMetadataParser { class HtmlMetadataParser(config: Config) extends MetadataParser { protected object settings { - val parserConfig = MetadataParserConfig(config) - val sizeLimit = config.getBytesInt("size-limit") + val parserConfig = MetadataParserConfig(config) + val sizeLimit = config.getBytesInt("size-limit") val removeElements = config.getStringList("remove-elements").asScala + val allowedHosts = config.getStringList("img-allowed-hosts").asScala } def canParse(name: String, mime: String): Boolean = { @@ -36,6 +40,19 @@ class HtmlMetadataParser(config: Config) extends MetadataParser { .map { bytes ⇒ val html = Jsoup.parse(bytes.utf8String).body() + html + .select("img, video") + .asScala + .filterNot { e ⇒ + val srcHost = Option(e.attr("src")) + .flatMap(url ⇒ Try(new URI(url).getHost).toOption) + + srcHost exists { host ⇒ + settings.allowedHosts.exists(ah ⇒ host == ah || host.endsWith("." + ah)) + } + } + .foreach(_.remove()) + html .select(settings.removeElements.mkString(", ")) .asScala diff --git a/project/WebDeps.scala b/project/WebDeps.scala index c57a7295..dc265655 100644 --- a/project/WebDeps.scala +++ b/project/WebDeps.scala @@ -30,6 +30,10 @@ object WebDeps { |.sc-main-container { | margin-bottom: 100px; |} + | + |.well img, .modal-body img { + | width: 100% !important; + |} """.stripMargin }