Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: skip experimental versions on scrape #352

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,15 @@ If a version date prior to `2021-11-08` resolves to `2021-09-14~experimental`, y

If a version date after `2021-11-08` matches `2021-09-14~experimental`, let's say a request for `2021-12-10~experimental`, then you should see it as it would appear after the non-breaking change, `2021-11-08_13_14_15.spec.yaml`.

# Phase-out of experimental versions

We are phasing out the support for experimental versions. Starting now, Vervet Underground will no longer scrape dates that should not be publicly documented. This means that

- Vervet Undergrounds scraper will no longer scrape experimental versions newer than 2024-09-08
- Older versions will continue to be scraped and documented as usual, we have opted to keep the older versions as not to break any existing integrations.

This change ensures that our API documentation remains clear and focused on stable, reliable versions that are ready for public use. It helps maintain the integrity and quality of our publicly available APIs, ensuring that consumers are not exposed to experimental and potentially unstable features.

# Roadmap

## Minimum Viable
Expand Down
28 changes: 20 additions & 8 deletions internal/scraper/gcs_scraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestGCSScraper(t *testing.T) {
tests := []struct {
service, version, digest string
}{
{"petfood", "2021-09-01", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"petfood", "2021-09-01~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"animals", "2021-10-16", "sha256:hcv2i7awT6CcSCecw9WrYBokFyzYNVaQArGgqHqdj7s="},
}

Expand Down Expand Up @@ -61,9 +61,15 @@ func TestGCSScraper(t *testing.T) {

// Version digests now known to storage
for _, test := range tests {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
if !scraper.IsPubliclyDocumented(test.version) {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsFalse, qt.Commentf("publicly undocumented version %s should not be included", test.version))
} else {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
}
}

vi, err := st.VersionIndex(ctx)
Expand Down Expand Up @@ -95,7 +101,7 @@ func TestGCSScraperCollation(t *testing.T) {
tests := []struct {
service, version, digest string
}{
{"petfood", "2021-09-01", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"petfood", "2021-09-01~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"animals", "2021-10-16", "sha256:hcv2i7awT6CcSCecw9WrYBokFyzYNVaQArGgqHqdj7s="},
}

Expand Down Expand Up @@ -125,9 +131,15 @@ func TestGCSScraperCollation(t *testing.T) {

// Version digests now known to storage
for _, test := range tests {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
if !scraper.IsPubliclyDocumented(test.version) {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsFalse, qt.Commentf("Publicly undocumented version %s should not be included", test.version))
} else {
ok, err := st.HasVersion(ctx, test.service, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
}
}

vi, err := st.VersionIndex(ctx)
Expand Down
28 changes: 20 additions & 8 deletions internal/scraper/s3_scraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestS3Scraper(t *testing.T) {
tests := []struct {
name, version, digest string
}{
{"petfood", "2021-09-01", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"petfood", "2021-09-01~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"animals", "2021-10-16", "sha256:hcv2i7awT6CcSCecw9WrYBokFyzYNVaQArGgqHqdj7s="},
}

Expand Down Expand Up @@ -57,9 +57,15 @@ func TestS3Scraper(t *testing.T) {

// Version digests now known to storage
for _, test := range tests {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
if !scraper.IsPubliclyDocumented(test.version) {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsFalse, qt.Commentf("publicly undocumented version %s should not be included", test.version))
} else {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
}
}

vi, err := st.VersionIndex(ctx)
Expand Down Expand Up @@ -90,7 +96,7 @@ func TestS3ScraperCollation(t *testing.T) {
tests := []struct {
name, version, digest string
}{{
"petfood", "2021-09-01", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U=",
"petfood", "2021-09-01~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U=",
}, {
"animals", "2021-10-16", "sha256:hcv2i7awT6CcSCecw9WrYBokFyzYNVaQArGgqHqdj7s=",
}}
Expand All @@ -117,9 +123,15 @@ func TestS3ScraperCollation(t *testing.T) {

// Version digests now known to storage
for _, test := range tests {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
if !scraper.IsPubliclyDocumented(test.version) {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsFalse, qt.Commentf("publicly undocumented version %s should not be included", test.version))
} else {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
}
}

vi, err := st.VersionIndex(ctx)
Expand Down
25 changes: 23 additions & 2 deletions internal/scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,10 @@ func (s *Scraper) scrape(ctx context.Context, scrapeTime time.Time, svc service)
}

for i := range versions {
// TODO: we might run this concurrently per live service pod if/when
// we're more k8s aware, but we won't do that yet.
// Skip if the version is not publicly documented
if !IsPubliclyDocumented(versions[i]) {
continue
}

// Skip if it's a legacy api using the default legacy version.
if isLegacyVersion(versions[i]) {
Expand Down Expand Up @@ -290,3 +292,22 @@ func isLegacyVersion(version string) bool {
// This default version predates vervet's creation date.
return version == "2021-01-01"
}

func IsPubliclyDocumented(version string) bool {
const publiclyDocumentedDate = "2024-09-08"
if !IsExperimentalVersion(version) {
return true
}
parts := strings.Split(version, "~")
if len(parts) > 0 {
versionDate := parts[0]
if versionDate <= publiclyDocumentedDate {
return true
}
}
return false
}

func IsExperimentalVersion(version string) bool {
return strings.HasSuffix(version, "~experimental")
}
31 changes: 20 additions & 11 deletions internal/scraper/scraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,19 @@ import (
var (
t0 = time.Date(2021, time.December, 3, 20, 49, 51, 0, time.UTC)
collatedPaths = map[string]int{
"2021-09-01": 1,
"2021-09-16": 2,
"2021-10-01": 3,
"2021-10-16": 4,
"2021-09-01~experimental": 1, // publicly documented version
"2021-09-16": 2,
"2021-10-01": 3,
"2021-10-16": 4,
"2024-09-09~experimental": 5, // publicly undocumented version
}

petfood = &testService{
versions: []string{"2021-09-01", "2021-09-16"},
versions: []string{"2021-09-01~experimental", "2021-09-16", "2024-09-09~experimental"},
contents: map[string]string{
"2021-09-01": `{"paths":{"/crickets": {"get": {}}}}`,
"2021-09-16": `{"paths":{"/crickets": {"get": {}}, "/kibble": {"get": {}}}}`,
"2021-09-01~experimental": `{"paths":{"/crickets": {"get": {}}}}`,
"2021-09-16": `{"paths":{"/crickets": {"get": {}}, "/kibble": {"get": {}}}}`,
"2024-09-09~experimental": `{"paths":{"/newexperiment": {"get": {}}}}`,
},
}
animals = &testService{
Expand Down Expand Up @@ -85,7 +87,8 @@ func TestScraper(t *testing.T) {
tests := []struct {
name, version, digest string
}{
{"petfood", "2021-09-01", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"petfood", "2021-09-01~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"petfood", "2024-09-09~experimental", "sha256:zCgJaPeR8R21wsAlYn46xO6NE3XJiyFtLnYrP4DpM3U="},
{"animals", "2021-10-16", "sha256:hcv2i7awT6CcSCecw9WrYBokFyzYNVaQArGgqHqdj7s="},
}

Expand Down Expand Up @@ -126,9 +129,15 @@ func TestScraper(t *testing.T) {

// Version digests now known to storage
for _, test := range tests {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
if !scraper.IsPubliclyDocumented(test.version) {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsFalse, qt.Commentf("publicly undocumented version %s should not be included", test.version))
} else {
ok, err := st.HasVersion(ctx, test.name, test.version, test.digest)
c.Assert(err, qt.IsNil)
c.Assert(ok, qt.IsTrue)
}
}

vi, err := st.VersionIndex(ctx)
Expand Down