Skip to content

Commit

Permalink
HTML sanitizer improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Karasiq committed Sep 18, 2020
1 parent 5193779 commit dc24380
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
3 changes: 2 additions & 1 deletion metadata/markdown/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ shadowcloud.metadata {
extensions = [html, htm, xhtml]
mimes = [text/html]
size-limit = 20M
remove-elements = [img, video, link, script, style, button, form, input, textarea, frame, iframe, svg, select]
remove-elements = [link, script, style, button, input, textarea, frame, iframe, svg, select]
img-allowed-hosts = [localhost, 127.0.0.1]
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.karasiq.shadowcloud.metadata.markdown

import java.net.URI

import akka.NotUsed
import akka.stream.scaladsl.Flow
import akka.util.ByteString
Expand All @@ -12,16 +14,18 @@ import com.typesafe.config.Config
import org.jsoup.Jsoup

import scala.collection.JavaConverters._
import scala.util.Try

object HtmlMetadataParser {
def apply(config: Config): HtmlMetadataParser = new HtmlMetadataParser(config)
}

class HtmlMetadataParser(config: Config) extends MetadataParser {
protected object settings {
val parserConfig = MetadataParserConfig(config)
val sizeLimit = config.getBytesInt("size-limit")
val parserConfig = MetadataParserConfig(config)
val sizeLimit = config.getBytesInt("size-limit")
val removeElements = config.getStringList("remove-elements").asScala
val allowedHosts = config.getStringList("img-allowed-hosts").asScala
}

def canParse(name: String, mime: String): Boolean = {
Expand All @@ -36,6 +40,19 @@ class HtmlMetadataParser(config: Config) extends MetadataParser {
.map { bytes
val html = Jsoup.parse(bytes.utf8String).body()

html
.select("img, video")
.asScala
.filterNot { e
val srcHost = Option(e.attr("src"))
.flatMap(url Try(new URI(url).getHost).toOption)

srcHost exists { host
settings.allowedHosts.exists(ah host == ah || host.endsWith("." + ah))
}
}
.foreach(_.remove())

html
.select(settings.removeElements.mkString(", "))
.asScala
Expand Down
4 changes: 4 additions & 0 deletions project/WebDeps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ object WebDeps {
|.sc-main-container {
| margin-bottom: 100px;
|}
|
|.well img, .modal-body img {
| width: 100% !important;
|}
""".stripMargin
}

Expand Down

0 comments on commit dc24380

Please sign in to comment.