Skip to content

Commit

Permalink
Add tool to repair broken maven-metadata [#455]
Browse files Browse the repository at this point in the history
Without atomic deploys, we end up with deploys that don't complete
successfully, so leave maven-metadata.xml files in a state that is
inconsistent with the actually deployed versions. Once the metadata gets
in this state, the problem worsens, since aether will deploy the pom and
jar files, then will fail to update the metadata file if its sum isn't
correct.

This adds a command-line tool that can scan the repo to report on or
repair metadata files that has a version list that doesn't match the
actual versions, or has invalid/missing sums.

This tool should only be needed until atomic deploys are implemented,
which will prevent the inconsistencies from occurring.
  • Loading branch information
tobias committed Feb 1, 2016
1 parent 7e915e4 commit eab4740
Show file tree
Hide file tree
Showing 16 changed files with 268 additions and 3 deletions.
Empty file.
Empty file.
Empty file.
Empty file.
13 changes: 13 additions & 0 deletions dev-resources/bad-metadata/foo/bar/maven-metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>foo</groupId>
<artifactId>bar</artifactId>
<versioning>
<release>0.3.0</release>
<versions>
<version>0.2.0</version>
<version>0.3.0</version>
</versions>
<lastUpdated>20151220023146</lastUpdated>
</versioning>
</metadata>
Empty file.
12 changes: 12 additions & 0 deletions dev-resources/bad-metadata/foo/baz/maven-metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>foo</groupId>
<artifactId>baz</artifactId>
<versioning>
<release>0.1.0</release>
<versions>
<version>0.1.0</version>
</versions>
<lastUpdated>20151220023146</lastUpdated>
</versioning>
</metadata>
Empty file.
12 changes: 12 additions & 0 deletions dev-resources/bad-metadata/foo/biscuit/maven-metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>foo</groupId>
<artifactId>biscuit</artifactId>
<versioning>
<release>0.1.0</release>
<versions>
<version>0.1.0</version>
</versions>
<lastUpdated>20151220023146</lastUpdated>
</versioning>
</metadata>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4645007dcb6fc71d380ba8cb0af32c3f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2003a468928567dc1f2a4f6d07cc7b2abb5762be
3 changes: 2 additions & 1 deletion project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
[duct/hikaricp-component "0.1.0"]
[duct "0.4.4"]
[meta-merge "0.1.1"]
[ring-jetty-component "0.3.0"]]
[ring-jetty-component "0.3.0"]
[digest "1.4.4"]]
:plugins [[supersport "1"]]
:main ^:skip-aot clojars.main
:target-path "target/%s/"
Expand Down
51 changes: 51 additions & 0 deletions src/clojars/file_utils.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
(ns clojars.file-utils
(:require [clojure.java.io :as io]
[digest :as d]))

(defn sum-file
"Returns a file for the sum of `file` of type `type`"
[file type]
(let [file' (io/file file)]
(io/file (.getParentFile file')
(format "%s.%s" (.getName file') (name type)))))

(defn- create-sum [f file type]
(let [file' (io/file file)]
(spit (sum-file file' type) (f file'))))

(def ^:private sum-generators
{:md5 d/md5
:sha1 d/sha-1})

(defn create-md5-sum
"Creates md5 sum file for `file`"
[file]
(create-sum (sum-generators :md5) file :md5))

(defn create-sha1-sum
"Creates sha1 sum file for `file`"
[file]
(create-sum (sum-generators :sha1) file :sha1))

(defn create-sums
"Creates md5 and sha1 sum files for `file`"
[file]
(create-md5-sum file)
(create-sha1-sum file))

(defn valid-sum?
"Checks to see if a sum of type `type` exists and is valid for `file`"
[file type]
(let [sig-file (sum-file file type)]
(and (.exists sig-file)
(= ((sum-generators type) (io/file file))
(slurp sig-file)))))

(defn valid-sums?
"Checks to see if both md5 and sha1 sums exist and are valid for `file`"
[file]
(reduce (fn [valid? sig-type]
(and valid?
(valid-sum? file sig-type)))
true
[:md5 :sha1]))
12 changes: 10 additions & 2 deletions src/clojars/maven.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
(:import org.apache.maven.model.io.xpp3.MavenXpp3Reader
org.apache.maven.artifact.repository.metadata.io.xpp3.MetadataXpp3Reader
java.io.IOException
(org.apache.maven.model Scm Model License)))
(org.apache.maven.model Scm Model License)
(org.apache.maven.artifact.repository.metadata Metadata)
(org.apache.maven.artifact.repository.metadata.io.xpp3 MetadataXpp3Writer)))

(defn without-nil-values
"Prunes a map of pairs that have nil values."
Expand Down Expand Up @@ -61,12 +63,18 @@

(def pom-to-map (comp model-to-map read-pom))

(defn read-metadata
(defn ^Metadata read-metadata
"Reads a maven-metadata file returning a maven Metadata object."
[file]
(with-open [reader (io/reader file)]
(.read (MetadataXpp3Reader.) reader)))

(defn write-metadata
"Writes the given metadata out to a file."
[^Metadata metadata file]
(with-open [writer (io/writer file)]
(.write (MetadataXpp3Writer.) writer metadata)))

(defn snapshot-version
"Get snapshot version from maven-metadata.xml used in pom filename"
[file]
Expand Down
90 changes: 90 additions & 0 deletions src/clojars/tools/repair_metadata.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
(ns clojars.tools.repair-metadata
(:require [clojars.maven :as mvn]
[clojars.file-utils :as futil]
[clojure.java.io :as io])
(:import (java.io File)
(org.apache.commons.io FileUtils)
(org.apache.maven.artifact.repository.metadata Metadata)
(java.text SimpleDateFormat)
(java.util Date))
(:gen-class))

(defn find-bad-metadata [repo]
;; This is gross, mainly because maven uses maven-metadata.xml files for three things:
;; * data for SNAPSHOTS (within the x.x.x-SNAPSHOT dir)
;; * data for plugins (within the group dir)
;; * data for release versions (within the artifact dir)
;; and we only care about the last one, so have to filter out the other two
(for [f (file-seq (io/file repo))
:when (= "maven-metadata.xml" (.getName f))
;; ignore metadata within SNAPSHOT dir
:when (not (.endsWith (.getName (.getParentFile f)) "SNAPSHOT"))
:let [^Metadata metadata (try
(mvn/read-metadata f)
(catch Exception _
(println "Failed to read" f)))]
:when metadata
;; ignore plugin metadata files
:when (not (seq (.getPlugins metadata)))
:let [parent (.getParentFile ^File f)
version-dirs (for [dir (file-seq parent)
:when (.isDirectory dir)
:when (not= dir parent)
;; filter out artifacts where the current dir is a group as well as an artifact
:when (or (.endsWith (.getName dir) "SNAPSHOT")
(not (.exists (io/file dir "maven-metadata.xml"))))]
dir)
versions (set (map (memfn getName) version-dirs))
missing-versions? (not= versions (set (.getVersions (.getVersioning metadata))))
invalid-sums? (not (futil/valid-sums? f))]
:when (or missing-versions? invalid-sums?)]
{:file f
:metadata metadata
:group-id (.getGroupId metadata)
:artifact-id (.getArtifactId metadata)
:version-dirs version-dirs
:missing-versions? missing-versions?
:invalid-sums? invalid-sums?}))

(def date-formatter (SimpleDateFormat. "yyyyMMddHHmmss"))

(defn backup-metadata [backup-dir {:keys [file group-id artifact-id]}]
(let [to-dir (doto (io/file backup-dir group-id artifact-id (.format date-formatter (Date.)))
.mkdirs)]
(run! #(FileUtils/copyFileToDirectory % to-dir)
(filter (memfn exists)
[file (futil/sum-file file :sha1) (futil/sum-file file :md5)]))))

(defn repair-versions [{:keys [file metadata version-dirs]}]
(let [versioning (.getVersioning metadata)
sorted-dirs (sort-by #(.lastModified %) version-dirs)]
;; remove existing versions, then write dir versions in dir creation order
(run! #(.removeVersion versioning %) (into [] (.getVersions versioning)))
(run! #(.addVersion versioning %) (map (memfn getName) sorted-dirs))
;; set release to latest !snapshot
(.setRelease versioning
(->> sorted-dirs
(filter #(not (.endsWith (.getName %) "SNAPSHOT")))
last
.getName))
;; set lastUpdated to latest version
(.setLastUpdated versioning (.format date-formatter
(-> sorted-dirs last .lastModified Date.)))

;; write new file
(mvn/write-metadata metadata file)))

(defn repair-metadata [backup-dir {:keys [file missing-versions?] :as data}]
(backup-metadata backup-dir data)
(when missing-versions?
(repair-versions data))
(futil/create-sums file))

(defn -main [& args]
(if (not= 3 (count args))
(println "Usage: repo-path backup-path (:repair|:report)")
(let [[repo backup-dir action] args]
(doseq [md (find-bad-metadata repo)]
(if (= ":repair" action)
(repair-metadata (io/file backup-dir) md)
(prn md))))))
76 changes: 76 additions & 0 deletions test/clojars/test/unit/tools/repair_metadata.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
(ns clojars.test.unit.tools.repair-metadata
(:require [clojars.tools.repair-metadata :as rmd]
[clojars.maven :as mvn]
[clojure.test :refer :all]
[clojure.java.io :as io]
digest
[clojars.file-utils :as futil])
(:import (org.apache.commons.io FileUtils)))

(def ^:dynamic *tmp-repo* nil)

(use-fixtures :each
(fn [f]
(binding [*tmp-repo* (doto (io/file (FileUtils/getTempDirectory)
(str "bad-metadata" (System/currentTimeMillis)))
.mkdirs)]
(FileUtils/copyDirectory (io/file (io/resource "bad-metadata")) *tmp-repo*)
(try
(f)
(finally
(FileUtils/deleteDirectory *tmp-repo*))))))

(defn metadata-for-artifact [mds artifact-id]
(first (filter #(= artifact-id (:artifact-id %)) mds)))

(deftest find-bad-metadata-does-the-right-thing
(let [mds (rmd/find-bad-metadata *tmp-repo*)
bar (metadata-for-artifact mds "bar")
baz (metadata-for-artifact mds "baz")]
(is (= 2 (count mds)))
(is bar)
(is (:missing-versions? bar))
(is (:invalid-sums? bar))
(is baz)
(is (not (:missing-versions? baz)))
(is (:invalid-sums? baz))
(is (nil? (metadata-for-artifact mds "biscuit")))))

(deftest repair-metadata-corrects-versions
(let [backup-dir (doto (io/file (FileUtils/getTempDirectory)
(str "bad-metadata-backup" (System/currentTimeMillis)))
.mkdirs)
bar-file (io/file *tmp-repo* "foo/bar/maven-metadata.xml")]
(try
(rmd/repair-metadata backup-dir (metadata-for-artifact (rmd/find-bad-metadata *tmp-repo*) "bar"))
(testing "makes a backup"
(is (= 1 (count (filter #(= "maven-metadata.xml" (.getName %)) (file-seq backup-dir))))))

(testing "creates the correct metadata"
(let [md (mvn/read-metadata bar-file)
versioning (.getVersioning md)]
(is (= "0.4.0" (.getRelease versioning)))
(is (= ["0.1.0" "0.2.0" "0.4.0" "0.5.0-SNAPSHOT"] (.getVersions versioning)))))

(testing "writes correct sums"
(is (futil/valid-sums? bar-file)))

(finally
(FileUtils/deleteDirectory backup-dir)))))

(deftest repair-metadata-corrects-sums
(let [backup-dir (doto (io/file (FileUtils/getTempDirectory)
(str "bad-metadata-backup" (System/currentTimeMillis)))
.mkdirs)
baz-file (io/file *tmp-repo* "foo/baz/maven-metadata.xml")]
(try
(rmd/repair-metadata backup-dir (metadata-for-artifact (rmd/find-bad-metadata *tmp-repo*) "baz"))
(testing "makes a backup"
(is (= 1 (count (filter #(= "maven-metadata.xml" (.getName %)) (file-seq backup-dir))))))

(testing "writes correct sums"
(is (futil/valid-sums? baz-file)))

(finally
(FileUtils/deleteDirectory backup-dir)))))

0 comments on commit eab4740

Please sign in to comment.