From 3e7cbf6a2c02ae6c3f7ca3f4106bb08a85802e3f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 22 Jun 2026 12:43:28 +0200 Subject: [PATCH] Add regression tests for the docs-schema generation path The docs schema (jsonschema_for_docs.json) is no longer generated or checked in on main, so nothing here exercised the `--docs` build path; a regression in it would only surface as missing/incorrect fields in the docs published to the docgen branch at release time. Extract the in-memory schema build into a git-free `buildSchema` (sinceVersion stamping, which shells out to git tags, stays in the generator), then add two fast unit tests against it: - docs mode omits the interpolation-pattern transform (plain field types, not the runtime `${...}` unions). - the sinceVersion key round-trips: every key flattenSchema emits is applied by addSinceVersionToSchema, reaching $defs and not just root properties. This guards the two walks against drifting apart. Behavior-preserving: regenerating jsonschema.json produces no diff. Co-authored-by: Isaac --- bundle/internal/schema/main.go | 81 ++++++++++++++++++----------- bundle/internal/schema/main_test.go | 81 +++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 31 deletions(-) diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 3f468852a83..e7e83df94d7 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -235,6 +235,45 @@ func configTypeGraph() (*typeGraph, error) { } func generateSchema(workdir, outputFile, cliJSONFile string, docsMode bool) { + s, err := buildSchema(workdir, cliJSONFile, docsMode) + if err != nil { + log.Fatal(err) + } + + // In docs mode, add sinceVersion annotations by analyzing git history. + // This relies on git tags, so it lives in the generator rather than in + // buildSchema, which stays git-free and testable. + if docsMode { + sinceVersions, err := computeSinceVersions() + if err != nil { + fmt.Printf("Warning: could not compute sinceVersion annotations: %v\n", err) + } else { + addSinceVersionToSchema(&s, sinceVersions) + } + } + + b, err := json.MarshalIndent(s, "", " ") + if err != nil { + log.Fatal(err) + } + + // Write the schema descriptions to the output file. + err = os.WriteFile(outputFile, b, 0o644) + if err != nil { + log.Fatal(err) + } +} + +// buildSchema generates the in-memory bundle JSON schema from the bundle Go +// types and the cli.json spec, and rewrites the annotations file in workdir +// (adding placeholders for new fields and dropping stale ones). +// +// When docsMode is true the interpolation-pattern transform is omitted, so the +// published docs schema shows plain field types instead of the `${...}` +// reference unions the runtime schema needs for autocomplete. sinceVersion +// annotations require git history and are applied by the caller, not here, so +// this stays pure and testable. +func buildSchema(workdir, cliJSONFile string, docsMode bool) (jsonschema.Schema, error) { annotationsPath := filepath.Join(workdir, "annotations.yml") // The cli.json schema graph is keyed by SDK type name (e.g. @@ -242,25 +281,25 @@ func generateSchema(workdir, outputFile, cliJSONFile string, docsMode bool) { // those keys directly. doc, err := clijson.Parse(cliJSONFile) if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } if len(doc.Schemas) == 0 { - log.Fatalf("no schemas found in %s", cliJSONFile) + return jsonschema.Schema{}, fmt.Errorf("no schemas found in %s", cliJSONFile) } extracted, err := newParser(doc.Schemas).extractAnnotations(reflect.TypeFor[config.Root]()) if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } graph, err := configTypeGraph() if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } fromFile, unknown, err := loadAnnotationsFile(annotationsPath, graph) if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } for _, k := range unknown { fmt.Printf("Dropping annotation at `%s`: no matching field in the bundle configuration\n", k) @@ -274,7 +313,7 @@ func generateSchema(workdir, outputFile, cliJSONFile string, docsMode bool) { a, err := newAnnotationHandler(extracted, fromFile) if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } transforms := []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ @@ -291,6 +330,9 @@ func generateSchema(workdir, outputFile, cliJSONFile string, docsMode bool) { // Generate the JSON schema from the bundle Go struct. s, err := jsonschema.FromType(reflect.TypeFor[config.Root](), transforms) + if err != nil { + return jsonschema.Schema{}, err + } // AdditionalProperties is set to an empty schema to allow non-typed keys used as yaml-anchors // Example: @@ -300,34 +342,11 @@ func generateSchema(workdir, outputFile, cliJSONFile string, docsMode bool) { // <<: *some_anchor s.AdditionalProperties = jsonschema.Schema{} - if err != nil { - log.Fatal(err) - } - // Overwrite the input annotation file, adding missing annotations err = a.syncWithMissingAnnotations(annotationsPath, graph) if err != nil { - log.Fatal(err) + return jsonschema.Schema{}, err } - // In docs mode, add sinceVersion annotations by analyzing git history. - if docsMode { - sinceVersions, err := computeSinceVersions() - if err != nil { - fmt.Printf("Warning: could not compute sinceVersion annotations: %v\n", err) - } else { - addSinceVersionToSchema(&s, sinceVersions) - } - } - - b, err := json.MarshalIndent(s, "", " ") - if err != nil { - log.Fatal(err) - } - - // Write the schema descriptions to the output file. - err = os.WriteFile(outputFile, b, 0o644) - if err != nil { - log.Fatal(err) - } + return s, nil } diff --git a/bundle/internal/schema/main_test.go b/bundle/internal/schema/main_test.go index c1f0f76bfc3..a0a2fa0c7f0 100644 --- a/bundle/internal/schema/main_test.go +++ b/bundle/internal/schema/main_test.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "encoding/json" "io" "os" "path" @@ -11,6 +12,7 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/libs/dyn/yamlloader" + "github.com/databricks/cli/libs/jsonschema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -96,3 +98,82 @@ func TestNoDetachedAnnotations(t *testing.T) { require.NoError(t, err) assert.Empty(t, unknown, "Detached annotations found; run `./task generate-schema` to drop them") } + +// buildTestSchema generates the in-memory bundle schema the same way the +// generator does, against a throwaway copy of the committed annotations file +// (buildSchema rewrites it in place). +func buildTestSchema(t *testing.T, docsMode bool) jsonschema.Schema { + t.Helper() + workdir := t.TempDir() + require.NoError(t, copyFile("annotations.yml", path.Join(workdir, "annotations.yml"))) + s, err := buildSchema(workdir, cliJSONPath, docsMode) + require.NoError(t, err) + return s +} + +func mustMarshalSchema(t *testing.T, s jsonschema.Schema) string { + t.Helper() + b, err := json.Marshal(s) + require.NoError(t, err) + return string(b) +} + +// The docs schema is no longer generated or checked in on main; it is built +// only on release and published to the docgen branch. These tests exercise the +// docsMode build path so a regression in it fails CI here instead of surfacing +// as missing/incorrect fields in the published docs. + +// Docs mode must drop the interpolation-pattern transform, so the published +// docs schema shows plain field types rather than the runtime `${...}` unions. +func TestBuildDocsSchemaOmitsInterpolationPatterns(t *testing.T) { + docs := buildTestSchema(t, true) + runtime := buildTestSchema(t, false) + + require.NotEmpty(t, docs.Properties, "docs schema has no root properties") + require.NotEmpty(t, docs.Definitions, "docs schema has no $defs") + + // Derive the marker from the generator's own helper so the assertion can't + // drift from what it emits. json.Marshal yields the quoted, escaped regex + // exactly as it appears in the schema; strip the surrounding quotes. + encoded, err := json.Marshal(interpolationPattern("bundle")) + require.NoError(t, err) + marker := string(encoded[1 : len(encoded)-1]) + + assert.Contains(t, mustMarshalSchema(t, runtime), marker, "runtime schema should contain interpolation patterns") + assert.NotContains(t, mustMarshalSchema(t, docs), marker, "docs schema must omit interpolation patterns") +} + +// computeSinceVersions emits keys via flattenSchema; addSinceVersionToSchema +// consumes the same key format. This feeds every such key a sentinel version +// and asserts it lands on both root properties and nested $defs, guarding the +// two walks against drifting apart, which would silently drop x-since-version +// from the published docs schema. +func TestDocsSchemaSinceVersionRoundTrip(t *testing.T) { + s := buildTestSchema(t, true) + + var raw map[string]any + require.NoError(t, json.Unmarshal([]byte(mustMarshalSchema(t, s)), &raw)) + fields := flattenSchema(raw) + require.NotEmpty(t, fields) + + const sentinel = "v9.9.9" + sinceVersions := make(map[string]string, len(fields)) + for key := range fields { + sinceVersions[key] = sentinel + } + + addSinceVersionToSchema(&s, sinceVersions) + + // Every root property key is in fields, so all must be stamped. + require.NotEmpty(t, s.Properties) + for name, prop := range s.Properties { + assert.Equal(t, sentinel, prop.SinceVersion, "root property %q missing x-since-version", name) + } + + // $defs are stamped by walkDefinitions. If its type assertions stop matching + // the schema structure, nested fields silently keep an empty version, so + // assert the stamp reached well beyond the root properties. + needle := `"x-since-version":"` + sentinel + `"` + stamped := strings.Count(mustMarshalSchema(t, s), needle) + assert.Greater(t, stamped, len(s.Properties), "x-since-version should reach $defs fields, not only root properties") +}