Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 1 | // Copyright 2020 CUE Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package jsonschema |
| 16 | |
| 17 | import ( |
| 18 | "net/url" |
| 19 | "path" |
Marcel van Lohuizen | b5b7521 | 2020-05-19 15:11:03 +0200 | [diff] [blame] | 20 | "strconv" |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 21 | "strings" |
| 22 | |
Marcel van Lohuizen | 845df05 | 2020-07-26 13:15:45 +0200 | [diff] [blame] | 23 | "cuelang.org/go/cue" |
Marcel van Lohuizen | 435989a | 2020-05-06 18:43:58 +0200 | [diff] [blame] | 24 | "cuelang.org/go/cue/ast" |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 25 | "cuelang.org/go/cue/errors" |
| 26 | "cuelang.org/go/cue/token" |
Marcel van Lohuizen | b7083ff | 2020-05-12 11:38:19 +0200 | [diff] [blame] | 27 | "cuelang.org/go/internal" |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 28 | ) |
| 29 | |
| 30 | func (d *decoder) parseRef(p token.Pos, str string) []string { |
| 31 | u, err := url.Parse(str) |
| 32 | if err != nil { |
| 33 | d.addErr(errors.Newf(p, "invalid JSON reference: %s", err)) |
| 34 | return nil |
| 35 | } |
| 36 | |
| 37 | if u.Host != "" || u.Path != "" { |
| 38 | d.addErr(errors.Newf(p, "external references (%s) not supported", str)) |
| 39 | // TODO: handle |
| 40 | // host: |
| 41 | // If the host corresponds to a package known to cue, |
| 42 | // load it from there. It would prefer schema converted to |
| 43 | // CUE, although we could consider loading raw JSON schema |
| 44 | // if present. |
| 45 | // If not present, advise the user to run cue get. |
| 46 | // path: |
| 47 | // Look up on file system or relatively to authority location. |
| 48 | return nil |
| 49 | } |
| 50 | |
| 51 | if !path.IsAbs(u.Fragment) { |
| 52 | d.addErr(errors.Newf(p, "anchors (%s) not supported", u.Fragment)) |
| 53 | // TODO: support anchors |
| 54 | return nil |
| 55 | } |
| 56 | |
| 57 | // NOTE: Go bug?: url.URL has no raw representation of the fragment. This |
| 58 | // means that %2F gets translated to `/` before it can be split. This, in |
| 59 | // turn, means that field names cannot have a `/` as name. |
| 60 | |
Marcel van Lohuizen | b5b7521 | 2020-05-19 15:11:03 +0200 | [diff] [blame] | 61 | return splitFragment(u) |
| 62 | } |
| 63 | |
| 64 | // resolveURI parses a URI from n and resolves it in the current context. |
| 65 | // To resolve it in the current context, it looks for the closest URI from |
| 66 | // an $id in the parent scopes and the uses the URI resolution to get the |
| 67 | // new URI. |
| 68 | // |
| 69 | // This method is used to resolve any URI, including those from $id and $ref. |
| 70 | func (s *state) resolveURI(n cue.Value) *url.URL { |
| 71 | str, ok := s.strValue(n) |
| 72 | if !ok { |
| 73 | return nil |
| 74 | } |
| 75 | |
| 76 | u, err := url.Parse(str) |
| 77 | if err != nil { |
| 78 | s.addErr(errors.Newf(n.Pos(), "invalid JSON reference: %s", err)) |
| 79 | return nil |
| 80 | } |
| 81 | |
| 82 | for { |
| 83 | if s.id != nil { |
| 84 | u = s.id.ResolveReference(u) |
| 85 | break |
| 86 | } |
| 87 | if s.up == nil { |
| 88 | break |
| 89 | } |
| 90 | s = s.up |
| 91 | } |
| 92 | |
| 93 | return u |
| 94 | } |
| 95 | |
| 96 | const topSchema = "_schema" |
| 97 | |
| 98 | // makeCUERef converts a URI into a CUE reference for the current location. |
| 99 | // The returned identifier (or first expression in a selection chain), is |
| 100 | // hardwired to point to the resolved value. This will allow astutil.Sanitize |
| 101 | // to automatically unshadow any shadowed variables. |
| 102 | func (s *state) makeCUERef(n cue.Value, u *url.URL) ast.Expr { |
| 103 | a := splitFragment(u) |
| 104 | |
| 105 | switch fn := s.cfg.Map; { |
| 106 | case fn != nil: |
| 107 | // TODO: This block is only used in case s.cfg.Map is set, which is |
| 108 | // currently only used for OpenAPI. Handling should be brought more in |
| 109 | // line with JSON schema. |
| 110 | a, err := fn(n.Pos(), a) |
| 111 | if err != nil { |
| 112 | s.addErr(errors.Newf(n.Pos(), "invalid reference %q: %v", u, err)) |
| 113 | return nil |
| 114 | } |
| 115 | if len(a) == 0 { |
| 116 | // TODO: should we allow inserting at root level? |
| 117 | s.addErr(errors.Newf(n.Pos(), |
| 118 | "invalid empty reference returned by map for %q", u)) |
| 119 | return nil |
| 120 | } |
| 121 | sel, ok := a[0].(ast.Expr) |
| 122 | if !ok { |
| 123 | sel = &ast.BadExpr{} |
| 124 | } |
| 125 | for _, l := range a[1:] { |
| 126 | switch x := l.(type) { |
| 127 | case *ast.Ident: |
| 128 | sel = &ast.SelectorExpr{X: sel, Sel: x} |
| 129 | |
| 130 | case *ast.BasicLit: |
| 131 | sel = &ast.IndexExpr{X: sel, Index: x} |
| 132 | } |
| 133 | } |
| 134 | return sel |
| 135 | } |
| 136 | |
| 137 | var ident *ast.Ident |
| 138 | |
| 139 | for ; ; s = s.up { |
| 140 | if s.up == nil { |
| 141 | switch { |
| 142 | case u.Host == "" && u.Path == "", |
| 143 | s.id != nil && s.id.Host == u.Host && s.id.Path == u.Path: |
| 144 | if len(a) == 0 { |
| 145 | // refers to the top of the file. We will allow this by |
| 146 | // creating a helper schema as such: |
| 147 | // _schema: {...} |
| 148 | // _schema |
| 149 | // This is created at the finalization stage if |
| 150 | // hasSelfReference is set. |
| 151 | s.hasSelfReference = true |
| 152 | |
| 153 | ident = ast.NewIdent(topSchema) |
| 154 | ident.Node = s.obj |
| 155 | return ident |
| 156 | } |
| 157 | |
| 158 | ident, a = s.getNextIdent(n, a) |
| 159 | |
| 160 | case u.Host != "": |
| 161 | // Reference not found within scope. Create an import reference. |
| 162 | |
| 163 | // TODO: allow the configuration to specify a map from |
| 164 | // URI domain+paths to CUE packages. |
| 165 | |
| 166 | // TODO: currently only $ids that are in scope can be |
| 167 | // referenced. We could consider doing an extra pass to record |
| 168 | // all '$id's in a file to be able to link to them even if they |
| 169 | // are not in scope. |
| 170 | p := u.Path |
| 171 | |
| 172 | base := path.Base(p) |
| 173 | if !ast.IsValidIdent(base) { |
| 174 | if strings.HasSuffix(base, ".json") { |
| 175 | base = base[:len(base)-len(".json")] |
| 176 | } |
| 177 | if !ast.IsValidIdent(base) { |
| 178 | // Find something more clever to do there. For now just |
| 179 | // pick "schema" as the package name. |
| 180 | base = "schema" |
| 181 | } |
| 182 | p += ":" + base |
| 183 | } |
| 184 | |
| 185 | ident = ast.NewIdent(base) |
| 186 | ident.Node = &ast.ImportSpec{Path: ast.NewString(u.Host + p)} |
| 187 | |
| 188 | default: |
| 189 | // Just a path, not sure what that means. |
| 190 | s.errf(n, "unknown domain for reference %q", u) |
| 191 | return nil |
| 192 | } |
| 193 | break |
| 194 | } |
| 195 | |
| 196 | if s.id == nil { |
| 197 | continue |
| 198 | } |
| 199 | |
| 200 | if s.id.Host == u.Host && s.id.Path == u.Path { |
| 201 | if len(a) == 0 { |
| 202 | if len(s.idRef) == 0 { |
| 203 | // This is a reference to either root or a schema for which |
| 204 | // we do not yet support references. See Issue #386. |
| 205 | if s.up.up != nil { |
| 206 | s.errf(n, "cannot refer to internal schema %q", u) |
| 207 | return nil |
| 208 | } |
| 209 | |
| 210 | // This is referring to the root scope. There is a dummy |
| 211 | // state above the root state that we need to update. |
| 212 | s = s.up |
| 213 | |
| 214 | // refers to the top of the file. We will allow this by |
| 215 | // creating a helper schema as such: |
| 216 | // _schema: {...} |
| 217 | // _schema |
| 218 | // This is created at the finalization stage if |
| 219 | // hasSelfReference is set. |
| 220 | s.hasSelfReference = true |
| 221 | ident = ast.NewIdent(topSchema) |
| 222 | ident.Node = s.obj |
| 223 | return ident |
| 224 | } |
| 225 | |
| 226 | x := s.idRef[0] |
| 227 | if !x.isDef && !ast.IsValidIdent(x.name) { |
| 228 | s.errf(n, "referring to field %q not supported", x.name) |
| 229 | return nil |
| 230 | } |
| 231 | e := ast.NewIdent(x.name) |
| 232 | if len(s.idRef) == 1 { |
| 233 | return e |
| 234 | } |
| 235 | return newSel(e, s.idRef[1]) |
| 236 | } |
| 237 | ident, a = s.getNextIdent(n, a) |
| 238 | ident.Node = s.obj |
| 239 | break |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | return s.newSel(ident, n, a) |
| 244 | } |
| 245 | |
| 246 | // getNextSelector translates a JSON Reference path into a CUE path by consuming |
| 247 | // the first path elements and returning the corresponding CUE label. |
| 248 | func (s *state) getNextSelector(v cue.Value, a []string) (l label, tail []string) { |
| 249 | switch elem := a[0]; elem { |
| 250 | case "$defs", "definitions": |
| 251 | if len(a) == 1 { |
| 252 | s.errf(v, "cannot refer to %s section: must refer to one of its elements", a[0]) |
| 253 | return label{}, nil |
| 254 | } |
| 255 | |
| 256 | if name := "#" + a[1]; ast.IsValidIdent(name) { |
| 257 | return label{name, true}, a[2:] |
| 258 | } |
| 259 | |
| 260 | return label{"#", true}, a[1:] |
| 261 | |
| 262 | case "properties": |
| 263 | if len(a) == 1 { |
| 264 | s.errf(v, "cannot refer to %s section: must refer to one of its elements", a[0]) |
| 265 | return label{}, nil |
| 266 | } |
| 267 | |
| 268 | return label{a[1], false}, a[2:] |
| 269 | |
| 270 | default: |
| 271 | return label{elem, false}, a[1:] |
| 272 | |
| 273 | case "additionalProperties", |
| 274 | "patternProperties", |
| 275 | "items", |
| 276 | "additionalItems": |
| 277 | // TODO: as a temporary workaround, include the schema verbatim. |
| 278 | // TODO: provide definitions for these in CUE. |
| 279 | s.errf(v, "referring to field %q not yet supported", elem) |
| 280 | |
| 281 | // Other known fields cannot be supported. |
| 282 | return label{}, nil |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | // newSel converts a JSON Reference path and initial CUE identifier to |
| 287 | // a CUE selection path. |
| 288 | func (s *state) newSel(e ast.Expr, v cue.Value, a []string) ast.Expr { |
| 289 | for len(a) > 0 { |
| 290 | var label label |
| 291 | label, a = s.getNextSelector(v, a) |
| 292 | e = newSel(e, label) |
| 293 | } |
| 294 | return e |
| 295 | } |
| 296 | |
| 297 | // newSel converts label to a CUE index and creates an expression to index |
| 298 | // into e. |
| 299 | func newSel(e ast.Expr, label label) ast.Expr { |
| 300 | if label.isDef { |
| 301 | return ast.NewSel(e, label.name) |
| 302 | |
| 303 | } |
| 304 | if ast.IsValidIdent(label.name) && !internal.IsDefOrHidden(label.name) { |
| 305 | return ast.NewSel(e, label.name) |
| 306 | } |
| 307 | return &ast.IndexExpr{X: e, Index: ast.NewString(label.name)} |
| 308 | } |
| 309 | |
| 310 | func (s *state) setField(lab label, f *ast.Field) { |
| 311 | x := s.getRef(lab) |
| 312 | x.field = f |
| 313 | s.setRef(lab, x) |
| 314 | x = s.getRef(lab) |
| 315 | } |
| 316 | |
| 317 | func (s *state) getRef(lab label) refs { |
| 318 | if s.fieldRefs == nil { |
| 319 | s.fieldRefs = make(map[label]refs) |
| 320 | } |
| 321 | x, ok := s.fieldRefs[lab] |
| 322 | if !ok { |
| 323 | if lab.isDef || |
| 324 | (ast.IsValidIdent(lab.name) && !internal.IsDefOrHidden(lab.name)) { |
| 325 | x.ident = lab.name |
| 326 | } else { |
| 327 | x.ident = "_X" + strconv.Itoa(s.decoder.numID) |
| 328 | s.decoder.numID++ |
| 329 | } |
| 330 | s.fieldRefs[lab] = x |
| 331 | } |
| 332 | return x |
| 333 | } |
| 334 | |
| 335 | func (s *state) setRef(lab label, r refs) { |
| 336 | s.fieldRefs[lab] = r |
| 337 | } |
| 338 | |
| 339 | // getNextIdent gets the first CUE reference from a JSON Reference path and |
| 340 | // converts it to a CUE identifier. |
| 341 | func (s *state) getNextIdent(v cue.Value, a []string) (resolved *ast.Ident, tail []string) { |
| 342 | lab, a := s.getNextSelector(v, a) |
| 343 | |
| 344 | x := s.getRef(lab) |
| 345 | ident := ast.NewIdent(x.ident) |
| 346 | x.refs = append(x.refs, ident) |
| 347 | s.setRef(lab, x) |
| 348 | |
| 349 | return ident, a |
| 350 | } |
| 351 | |
| 352 | // linkReferences resolves identifiers to relevant nodes. This allows |
| 353 | // astutil.Sanitize to unshadow nodes if necessary. |
| 354 | func (s *state) linkReferences() { |
| 355 | for _, r := range s.fieldRefs { |
| 356 | if r.field == nil { |
| 357 | // TODO: improve error message. |
| 358 | s.errf(cue.Value{}, "reference to non-existing value %q", r.ident) |
| 359 | continue |
| 360 | } |
| 361 | |
| 362 | // link resembles the link value. See astutil.Resolve. |
| 363 | var link ast.Node |
| 364 | |
| 365 | ident, ok := r.field.Label.(*ast.Ident) |
| 366 | if ok && ident.Name == r.ident { |
| 367 | link = r.field.Value |
| 368 | } else if len(r.refs) > 0 { |
| 369 | r.field.Label = &ast.Alias{ |
| 370 | Ident: ast.NewIdent(r.ident), |
| 371 | Expr: r.field.Label.(ast.Expr), |
| 372 | } |
| 373 | link = r.field |
| 374 | } |
| 375 | |
| 376 | for _, i := range r.refs { |
| 377 | i.Node = link |
| 378 | } |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | // splitFragment splits the fragment part of a URI into path components. The |
| 383 | // result may be an empty slice. |
| 384 | // |
| 385 | // TODO: this requires RawFragment introduced in go1.15 to function properly. |
| 386 | // As for now, CUE still uses go1.12. |
| 387 | func splitFragment(u *url.URL) []string { |
| 388 | if u.Fragment == "" { |
| 389 | return nil |
| 390 | } |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 391 | s := strings.TrimRight(u.Fragment[1:], "/") |
Marcel van Lohuizen | b5b7521 | 2020-05-19 15:11:03 +0200 | [diff] [blame] | 392 | if s == "" { |
| 393 | return nil |
| 394 | } |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 395 | return strings.Split(s, "/") |
| 396 | } |
| 397 | |
Marcel van Lohuizen | 435989a | 2020-05-06 18:43:58 +0200 | [diff] [blame] | 398 | func (d *decoder) mapRef(p token.Pos, str string, ref []string) []ast.Label { |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 399 | fn := d.cfg.Map |
| 400 | if fn == nil { |
| 401 | fn = jsonSchemaRef |
| 402 | } |
| 403 | a, err := fn(p, ref) |
| 404 | if err != nil { |
| 405 | if str == "" { |
| 406 | str = "#/" + strings.Join(ref, "/") |
| 407 | } |
| 408 | d.addErr(errors.Newf(p, "invalid reference %q: %v", str, err)) |
| 409 | return nil |
| 410 | } |
| 411 | if len(a) == 0 { |
| 412 | // TODO: should we allow inserting at root level? |
| 413 | if str == "" { |
| 414 | str = "#/" + strings.Join(ref, "/") |
| 415 | } |
| 416 | d.addErr(errors.Newf(p, |
| 417 | "invalid empty reference returned by map for %q", str)) |
| 418 | return nil |
| 419 | } |
| 420 | return a |
| 421 | } |
| 422 | |
Marcel van Lohuizen | 435989a | 2020-05-06 18:43:58 +0200 | [diff] [blame] | 423 | func jsonSchemaRef(p token.Pos, a []string) ([]ast.Label, error) { |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 424 | // TODO: technically, references could reference a |
| 425 | // non-definition. We disallow this case for the standard |
| 426 | // JSON Schema interpretation. We could detect cases that |
| 427 | // are not definitions and then resolve those as literal |
| 428 | // values. |
| 429 | if len(a) != 2 || (a[0] != "definitions" && a[0] != "$defs") { |
| 430 | return nil, errors.Newf(p, |
| 431 | // Don't mention the ability to use $defs, as this definition seems |
| 432 | // to already have been withdrawn from the JSON Schema spec. |
| 433 | "$ref must be of the form #/definitions/...") |
| 434 | } |
Marcel van Lohuizen | 435989a | 2020-05-06 18:43:58 +0200 | [diff] [blame] | 435 | name := a[1] |
| 436 | if ast.IsValidIdent(name) && |
| 437 | name != rootDefs[1:] && |
Marcel van Lohuizen | b7083ff | 2020-05-12 11:38:19 +0200 | [diff] [blame] | 438 | !internal.IsDefOrHidden(name) { |
Marcel van Lohuizen | 435989a | 2020-05-06 18:43:58 +0200 | [diff] [blame] | 439 | return []ast.Label{ast.NewIdent("#" + name)}, nil |
| 440 | } |
| 441 | return []ast.Label{ast.NewIdent(rootDefs), ast.NewString(name)}, nil |
Marcel van Lohuizen | 671b956 | 2020-03-10 12:42:45 +0100 | [diff] [blame] | 442 | } |