mirror of https://gitee.com/namelin2022/ollama
Browse Source
This package provides a way to convert JSON schemas to equivalent EBNF. It is intended to be a replacement to llama.cpp's schema_to_grammar. This is still an early version and does not yet support all JSON schema features. The to-do list includes: - minumum/maximum constraints on integer types - minLength/maxLength constraints on string types - defs and refsbmizerany/grammar
23 changed files with 1330 additions and 22 deletions
@ -0,0 +1,22 @@ |
|||
//go:build go1.24
|
|||
|
|||
package grammar |
|||
|
|||
import "testing" |
|||
|
|||
func BenchmarkFromSchema(b *testing.B) { |
|||
for tt := range testCases(b) { |
|||
b.Run("", func(b *testing.B) { |
|||
s := []byte(tt.schema) |
|||
|
|||
b.ReportAllocs() |
|||
for b.Loop() { |
|||
_, err := FromSchema(nil, s) |
|||
if err != nil { |
|||
b.Fatalf("GrammarFromSchema: %v", err) |
|||
} |
|||
} |
|||
}) |
|||
return |
|||
} |
|||
} |
|||
@ -0,0 +1,227 @@ |
|||
package grammar |
|||
|
|||
import ( |
|||
"bytes" |
|||
"encoding/json" |
|||
"fmt" |
|||
"iter" |
|||
"strconv" |
|||
|
|||
"github.com/ollama/ollama/grammar/jsonschema" |
|||
) |
|||
|
|||
const jsonTerms = ` |
|||
# Unicode |
|||
# |
|||
# Unicode characters can be specified directly in the grammar, for example |
|||
# hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit |
|||
# (\UXXXXXXXX). |
|||
unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8} |
|||
|
|||
# JSON grammar from RFC 7159 |
|||
null ::= "null" |
|||
object ::= "{" (kv ("," kv)*)? "}" |
|||
array ::= "[" (value ("," value)*)? "]" |
|||
kv ::= string ":" value |
|||
integer ::= "0" | [1-9] [0-9]* |
|||
number ::= "-"? integer frac? exp? |
|||
frac ::= "." [0-9]+ |
|||
exp ::= ("e" | "E") ("+" | "-") [0-9]+ |
|||
string ::= "\"" char* "\"" |
|||
escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode] |
|||
char ::= [^"\\] | escape |
|||
space ::= (" " | "\t" | "\n" | "\r")* |
|||
hex ::= [0-9] | [a-f] | [A-F] |
|||
boolean ::= "true" | "false" |
|||
value ::= object | array | string | number | boolean | "null" |
|||
|
|||
# User-defined |
|||
` |
|||
|
|||
// FromSchema generates a grammar from a JSON schema.
|
|||
func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) { |
|||
var s *jsonschema.Schema |
|||
if err := json.Unmarshal(jsonSchema, &s); err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
var g builder |
|||
|
|||
// "root" is the only rule that is guaranteed to exist, so we start
|
|||
// with its length for padding, and then adjust it as we go.
|
|||
g.pad = len("root") |
|||
for id := range dependencies("root", s) { |
|||
g.pad = max(g.pad, len(id)) |
|||
} |
|||
|
|||
g.b.WriteString(jsonTerms) |
|||
|
|||
ids := make(map[*jsonschema.Schema]string) |
|||
for id, s := range dependencies("root", s) { |
|||
ids[s] = id |
|||
g.define(id) |
|||
if err := fromSchema(&g, ids, s); err != nil { |
|||
return nil, err |
|||
} |
|||
} |
|||
g.define("root") |
|||
if err := fromSchema(&g, ids, s); err != nil { |
|||
return nil, err |
|||
} |
|||
g.define("") // finalize the last rule
|
|||
return g.b.Bytes(), nil |
|||
} |
|||
|
|||
func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error { |
|||
switch typ := s.EffectiveType(); typ { |
|||
case "array": |
|||
if len(s.PrefixItems) == 0 && s.Items == nil { |
|||
g.u("array") |
|||
} else { |
|||
g.q("[") |
|||
for i, s := range s.PrefixItems { |
|||
if i > 0 { |
|||
g.q(",") |
|||
} |
|||
g.u(ids[s]) |
|||
} |
|||
if s.Items != nil { |
|||
g.u("(") |
|||
if len(s.PrefixItems) > 0 { |
|||
g.q(",") |
|||
} |
|||
g.u(ids[s.Items]) |
|||
g.u(")*") |
|||
} |
|||
g.q("]") |
|||
} |
|||
case "object": |
|||
if len(s.Properties) == 0 { |
|||
g.u("object") |
|||
} else { |
|||
g.q("{") |
|||
for i, p := range s.Properties { |
|||
name := ids[p] |
|||
if i > 0 { |
|||
g.q(",") |
|||
} |
|||
g.q(p.Name) |
|||
g.q(":") |
|||
g.u(name) |
|||
} |
|||
g.q("}") |
|||
} |
|||
case "number": |
|||
buildConstrainedNumber(g, s) |
|||
case "string": |
|||
if len(s.Enum) == 0 { |
|||
g.u("string") |
|||
} else { |
|||
g.u("(") |
|||
for i, e := range s.Enum { |
|||
if i > 0 { |
|||
g.q("|") |
|||
} |
|||
g.q(string(e)) |
|||
} |
|||
g.u(")") |
|||
} |
|||
case "boolean", "value", "null", "integer": |
|||
g.u(typ) |
|||
default: |
|||
return fmt.Errorf("%s: unsupported type %q", s.Name, typ) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// dependencies returns a sequence of all child dependencies of the schema in
|
|||
// post-order.
|
|||
//
|
|||
// The first value is the id/pointer to the dependency, and the second value
|
|||
// is the schema.
|
|||
func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] { |
|||
return func(yield func(string, *jsonschema.Schema) bool) { |
|||
for i, p := range s.Properties { |
|||
id := fmt.Sprintf("%s_%d", id, i) |
|||
for did, d := range dependencies(id, p) { |
|||
if !yield(did, d) { |
|||
return |
|||
} |
|||
} |
|||
if !yield(id, p) { |
|||
return |
|||
} |
|||
} |
|||
for i, p := range s.PrefixItems { |
|||
id := fmt.Sprintf("tuple_%d", i) |
|||
for did, d := range dependencies(id, p) { |
|||
id := fmt.Sprintf("%s_%s", id, did) |
|||
if !yield(id, d) { |
|||
return |
|||
} |
|||
} |
|||
if !yield(id, p) { |
|||
return |
|||
} |
|||
} |
|||
if s.Items != nil { |
|||
id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems)) |
|||
for did, d := range dependencies(id, s.Items) { |
|||
if !yield(did, d) { |
|||
return |
|||
} |
|||
} |
|||
if !yield(id, s.Items) { |
|||
return |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
type builder struct { |
|||
b bytes.Buffer |
|||
pad int |
|||
rules int |
|||
items int |
|||
} |
|||
|
|||
// define terminates the current rule, if any, and then either starts a new
|
|||
// rule or does nothing else if the name is empty.
|
|||
func (b *builder) define(name string) { |
|||
if b.rules > 0 { |
|||
b.b.WriteString(";\n") |
|||
} |
|||
if name == "" { |
|||
return |
|||
} |
|||
fmt.Fprintf(&b.b, "% -*s", b.pad, name) |
|||
b.b.WriteString(" ::=") |
|||
b.rules++ |
|||
b.items = 0 |
|||
} |
|||
|
|||
// quote appends a terminal to the current rule.
|
|||
func (b *builder) q(s string) { |
|||
if b.items > 0 { |
|||
b.b.WriteString(" ") |
|||
} |
|||
b.b.WriteString(" ") |
|||
b.b.WriteString(strconv.Quote(s)) |
|||
} |
|||
|
|||
// u appends a non-terminal to the current rule.
|
|||
func (b *builder) u(s string) { |
|||
if b.items > 0 { |
|||
b.b.WriteString(" ") |
|||
} |
|||
b.b.WriteString(" ") |
|||
b.b.WriteString(s) |
|||
} |
|||
|
|||
func buildConstrainedNumber(b *builder, s *jsonschema.Schema) { |
|||
if s.Minimum == 0 && s.Maximum == 0 { |
|||
b.u("TODO") |
|||
} else { |
|||
b.u("number") |
|||
} |
|||
} |
|||
@ -0,0 +1,75 @@ |
|||
package grammar |
|||
|
|||
import ( |
|||
"bufio" |
|||
"cmp" |
|||
"iter" |
|||
"strings" |
|||
"testing" |
|||
|
|||
_ "embed" |
|||
|
|||
"github.com/ollama/ollama/grammar/internal/diff" |
|||
) |
|||
|
|||
func TestFromSchema(t *testing.T) { |
|||
for tt := range testCases(t) { |
|||
t.Run(tt.name, func(t *testing.T) { |
|||
g, err := FromSchema(nil, []byte(tt.schema)) |
|||
if err != nil { |
|||
t.Fatalf("FromSchema: %v", err) |
|||
} |
|||
got := string(g) |
|||
got = strings.TrimPrefix(got, jsonTerms) |
|||
if got != tt.want { |
|||
t.Logf("schema:\n%s", tt.schema) |
|||
t.Fatal(string(diff.Diff("got", []byte(got), "want", []byte(tt.want)))) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
|
|||
type testCase struct { |
|||
name string |
|||
schema string |
|||
want string |
|||
} |
|||
|
|||
//go:embed testdata/schemas.txt
|
|||
var tests string |
|||
|
|||
func testCases(t testing.TB) iter.Seq[testCase] { |
|||
t.Helper() |
|||
return func(yield func(testCase) bool) { |
|||
t.Helper() |
|||
sc := bufio.NewScanner(strings.NewReader(tests)) |
|||
name := "" |
|||
for sc.Scan() { |
|||
line := strings.TrimSpace(sc.Text()) |
|||
if line == "" { |
|||
name = "" |
|||
continue |
|||
} |
|||
if line[0] == '#' { |
|||
name = cmp.Or(name, strings.TrimSpace(line[1:])) |
|||
continue |
|||
} |
|||
s := sc.Text() |
|||
g := "" |
|||
for sc.Scan() { |
|||
line = strings.TrimSpace(sc.Text()) |
|||
if line == "" || line[0] == '#' { |
|||
break |
|||
} |
|||
g += sc.Text() + "\n" |
|||
} |
|||
if !yield(testCase{name, s, g}) { |
|||
return |
|||
} |
|||
name = strings.TrimSpace(strings.TrimPrefix(line, "#")) |
|||
} |
|||
if err := sc.Err(); err != nil { |
|||
t.Fatalf("error reading tests: %v", err) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,261 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package diff |
|||
|
|||
import ( |
|||
"bytes" |
|||
"fmt" |
|||
"sort" |
|||
"strings" |
|||
) |
|||
|
|||
// A pair is a pair of values tracked for both the x and y side of a diff.
|
|||
// It is typically a pair of line indexes.
|
|||
type pair struct{ x, y int } |
|||
|
|||
// Diff returns an anchored diff of the two texts old and new
|
|||
// in the “unified diff” format. If old and new are identical,
|
|||
// Diff returns a nil slice (no output).
|
|||
//
|
|||
// Unix diff implementations typically look for a diff with
|
|||
// the smallest number of lines inserted and removed,
|
|||
// which can in the worst case take time quadratic in the
|
|||
// number of lines in the texts. As a result, many implementations
|
|||
// either can be made to run for a long time or cut off the search
|
|||
// after a predetermined amount of work.
|
|||
//
|
|||
// In contrast, this implementation looks for a diff with the
|
|||
// smallest number of “unique” lines inserted and removed,
|
|||
// where unique means a line that appears just once in both old and new.
|
|||
// We call this an “anchored diff” because the unique lines anchor
|
|||
// the chosen matching regions. An anchored diff is usually clearer
|
|||
// than a standard diff, because the algorithm does not try to
|
|||
// reuse unrelated blank lines or closing braces.
|
|||
// The algorithm also guarantees to run in O(n log n) time
|
|||
// instead of the standard O(n²) time.
|
|||
//
|
|||
// Some systems call this approach a “patience diff,” named for
|
|||
// the “patience sorting” algorithm, itself named for a solitaire card game.
|
|||
// We avoid that name for two reasons. First, the name has been used
|
|||
// for a few different variants of the algorithm, so it is imprecise.
|
|||
// Second, the name is frequently interpreted as meaning that you have
|
|||
// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
|
|||
// when in fact the algorithm is faster than the standard one.
|
|||
func Diff(oldName string, old []byte, newName string, new []byte) []byte { |
|||
if bytes.Equal(old, new) { |
|||
return nil |
|||
} |
|||
x := lines(old) |
|||
y := lines(new) |
|||
|
|||
// Print diff header.
|
|||
var out bytes.Buffer |
|||
fmt.Fprintf(&out, "diff %s %s\n", oldName, newName) |
|||
fmt.Fprintf(&out, "--- %s\n", oldName) |
|||
fmt.Fprintf(&out, "+++ %s\n", newName) |
|||
|
|||
// Loop over matches to consider,
|
|||
// expanding each match to include surrounding lines,
|
|||
// and then printing diff chunks.
|
|||
// To avoid setup/teardown cases outside the loop,
|
|||
// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
|
|||
// in the sequence of matches.
|
|||
var ( |
|||
done pair // printed up to x[:done.x] and y[:done.y]
|
|||
chunk pair // start lines of current chunk
|
|||
count pair // number of lines from each side in current chunk
|
|||
ctext []string // lines for current chunk
|
|||
) |
|||
for _, m := range tgs(x, y) { |
|||
if m.x < done.x { |
|||
// Already handled scanning forward from earlier match.
|
|||
continue |
|||
} |
|||
|
|||
// Expand matching lines as far as possible,
|
|||
// establishing that x[start.x:end.x] == y[start.y:end.y].
|
|||
// Note that on the first (or last) iteration we may (or definitely do)
|
|||
// have an empty match: start.x==end.x and start.y==end.y.
|
|||
start := m |
|||
for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] { |
|||
start.x-- |
|||
start.y-- |
|||
} |
|||
end := m |
|||
for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] { |
|||
end.x++ |
|||
end.y++ |
|||
} |
|||
|
|||
// Emit the mismatched lines before start into this chunk.
|
|||
// (No effect on first sentinel iteration, when start = {0,0}.)
|
|||
for _, s := range x[done.x:start.x] { |
|||
ctext = append(ctext, "-"+s) |
|||
count.x++ |
|||
} |
|||
for _, s := range y[done.y:start.y] { |
|||
ctext = append(ctext, "+"+s) |
|||
count.y++ |
|||
} |
|||
|
|||
// If we're not at EOF and have too few common lines,
|
|||
// the chunk includes all the common lines and continues.
|
|||
const C = 3 // number of context lines
|
|||
if (end.x < len(x) || end.y < len(y)) && |
|||
(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { |
|||
for _, s := range x[start.x:end.x] { |
|||
ctext = append(ctext, " "+s) |
|||
count.x++ |
|||
count.y++ |
|||
} |
|||
done = end |
|||
continue |
|||
} |
|||
|
|||
// End chunk with common lines for context.
|
|||
if len(ctext) > 0 { |
|||
n := end.x - start.x |
|||
if n > C { |
|||
n = C |
|||
} |
|||
for _, s := range x[start.x : start.x+n] { |
|||
ctext = append(ctext, " "+s) |
|||
count.x++ |
|||
count.y++ |
|||
} |
|||
done = pair{start.x + n, start.y + n} |
|||
|
|||
// Format and emit chunk.
|
|||
// Convert line numbers to 1-indexed.
|
|||
// Special case: empty file shows up as 0,0 not 1,0.
|
|||
if count.x > 0 { |
|||
chunk.x++ |
|||
} |
|||
if count.y > 0 { |
|||
chunk.y++ |
|||
} |
|||
fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y) |
|||
for _, s := range ctext { |
|||
out.WriteString(s) |
|||
} |
|||
count.x = 0 |
|||
count.y = 0 |
|||
ctext = ctext[:0] |
|||
} |
|||
|
|||
// If we reached EOF, we're done.
|
|||
if end.x >= len(x) && end.y >= len(y) { |
|||
break |
|||
} |
|||
|
|||
// Otherwise start a new chunk.
|
|||
chunk = pair{end.x - C, end.y - C} |
|||
for _, s := range x[chunk.x:end.x] { |
|||
ctext = append(ctext, " "+s) |
|||
count.x++ |
|||
count.y++ |
|||
} |
|||
done = end |
|||
} |
|||
|
|||
return out.Bytes() |
|||
} |
|||
|
|||
// lines returns the lines in the file x, including newlines.
|
|||
// If the file does not end in a newline, one is supplied
|
|||
// along with a warning about the missing newline.
|
|||
func lines(x []byte) []string { |
|||
l := strings.SplitAfter(string(x), "\n") |
|||
if l[len(l)-1] == "" { |
|||
l = l[:len(l)-1] |
|||
} else { |
|||
// Treat last line as having a message about the missing newline attached,
|
|||
// using the same text as BSD/GNU diff (including the leading backslash).
|
|||
l[len(l)-1] += "\n\\ No newline at end of file\n" |
|||
} |
|||
return l |
|||
} |
|||
|
|||
// tgs returns the pairs of indexes of the longest common subsequence
|
|||
// of unique lines in x and y, where a unique line is one that appears
|
|||
// once in x and once in y.
|
|||
//
|
|||
// The longest common subsequence algorithm is as described in
|
|||
// Thomas G. Szymanski, “A Special Case of the Maximal Common
|
|||
// Subsequence Problem,” Princeton TR #170 (January 1975),
|
|||
// available at https://research.swtch.com/tgs170.pdf.
|
|||
func tgs(x, y []string) []pair { |
|||
// Count the number of times each string appears in a and b.
|
|||
// We only care about 0, 1, many, counted as 0, -1, -2
|
|||
// for the x side and 0, -4, -8 for the y side.
|
|||
// Using negative numbers now lets us distinguish positive line numbers later.
|
|||
m := make(map[string]int) |
|||
for _, s := range x { |
|||
if c := m[s]; c > -2 { |
|||
m[s] = c - 1 |
|||
} |
|||
} |
|||
for _, s := range y { |
|||
if c := m[s]; c > -8 { |
|||
m[s] = c - 4 |
|||
} |
|||
} |
|||
|
|||
// Now unique strings can be identified by m[s] = -1+-4.
|
|||
//
|
|||
// Gather the indexes of those strings in x and y, building:
|
|||
// xi[i] = increasing indexes of unique strings in x.
|
|||
// yi[i] = increasing indexes of unique strings in y.
|
|||
// inv[i] = index j such that x[xi[i]] = y[yi[j]].
|
|||
var xi, yi, inv []int |
|||
for i, s := range y { |
|||
if m[s] == -1+-4 { |
|||
m[s] = len(yi) |
|||
yi = append(yi, i) |
|||
} |
|||
} |
|||
for i, s := range x { |
|||
if j, ok := m[s]; ok && j >= 0 { |
|||
xi = append(xi, i) |
|||
inv = append(inv, j) |
|||
} |
|||
} |
|||
|
|||
// Apply Algorithm A from Szymanski's paper.
|
|||
// In those terms, A = J = inv and B = [0, n).
|
|||
// We add sentinel pairs {0,0}, and {len(x),len(y)}
|
|||
// to the returned sequence, to help the processing loop.
|
|||
J := inv |
|||
n := len(xi) |
|||
T := make([]int, n) |
|||
L := make([]int, n) |
|||
for i := range T { |
|||
T[i] = n + 1 |
|||
} |
|||
for i := range n { |
|||
k := sort.Search(n, func(k int) bool { |
|||
return T[k] >= J[i] |
|||
}) |
|||
T[k] = J[i] |
|||
L[i] = k + 1 |
|||
} |
|||
k := 0 |
|||
for _, v := range L { |
|||
if k < v { |
|||
k = v |
|||
} |
|||
} |
|||
seq := make([]pair, 2+k) |
|||
seq[1+k] = pair{len(x), len(y)} // sentinel at end
|
|||
lastj := n |
|||
for i := n - 1; i >= 0; i-- { |
|||
if L[i] == k && J[i] < lastj { |
|||
seq[k] = pair{xi[i], yi[J[i]]} |
|||
k-- |
|||
} |
|||
} |
|||
seq[0] = pair{0, 0} // sentinel at start
|
|||
return seq |
|||
} |
|||
@ -0,0 +1,44 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package diff |
|||
|
|||
import ( |
|||
"bytes" |
|||
"path/filepath" |
|||
"testing" |
|||
|
|||
"golang.org/x/tools/txtar" |
|||
) |
|||
|
|||
func clean(text []byte) []byte { |
|||
text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n")) |
|||
text = bytes.TrimSuffix(text, []byte("^D\n")) |
|||
return text |
|||
} |
|||
|
|||
func Test(t *testing.T) { |
|||
files, _ := filepath.Glob("testdata/*.txt") |
|||
if len(files) == 0 { |
|||
t.Fatalf("no testdata") |
|||
} |
|||
|
|||
for _, file := range files { |
|||
t.Run(filepath.Base(file), func(t *testing.T) { |
|||
a, err := txtar.ParseFile(file) |
|||
if err != nil { |
|||
t.Fatal(err) |
|||
} |
|||
if len(a.Files) != 3 || a.Files[2].Name != "diff" { |
|||
t.Fatalf("%s: want three files, third named \"diff\"", file) |
|||
} |
|||
diffs := Diff(a.Files[0].Name, clean(a.Files[0].Data), a.Files[1].Name, clean(a.Files[1].Data)) |
|||
want := clean(a.Files[2].Data) |
|||
if !bytes.Equal(diffs, want) { |
|||
t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file, |
|||
diffs, want, Diff("have", diffs, "want", want)) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
@ -0,0 +1,13 @@ |
|||
-- old -- |
|||
-- new -- |
|||
a |
|||
b |
|||
c |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -0,0 +1,3 @@ |
|||
+a |
|||
+b |
|||
+c |
|||
@ -0,0 +1,13 @@ |
|||
-- old -- |
|||
a |
|||
b |
|||
c |
|||
-- new -- |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,3 +0,0 @@ |
|||
-a |
|||
-b |
|||
-c |
|||
@ -0,0 +1,35 @@ |
|||
Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.” |
|||
https://www.cs.dartmouth.edu/~doug/diff.pdf |
|||
|
|||
-- old -- |
|||
a |
|||
b |
|||
c |
|||
d |
|||
e |
|||
f |
|||
g |
|||
-- new -- |
|||
w |
|||
a |
|||
b |
|||
x |
|||
y |
|||
z |
|||
e |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,7 +1,7 @@ |
|||
+w |
|||
a |
|||
b |
|||
-c |
|||
-d |
|||
+x |
|||
+y |
|||
+z |
|||
e |
|||
-f |
|||
-g |
|||
@ -0,0 +1,40 @@ |
|||
-- old -- |
|||
a |
|||
|
|||
b |
|||
|
|||
c |
|||
|
|||
d |
|||
|
|||
e |
|||
|
|||
f |
|||
-- new -- |
|||
a |
|||
|
|||
B |
|||
|
|||
C |
|||
|
|||
d |
|||
|
|||
e |
|||
|
|||
f |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,8 +1,8 @@ |
|||
a |
|||
$ |
|||
-b |
|||
- |
|||
-c |
|||
+B |
|||
+ |
|||
+C |
|||
$ |
|||
d |
|||
$ |
|||
@ -0,0 +1,38 @@ |
|||
-- old -- |
|||
1 |
|||
2 |
|||
3 |
|||
4 |
|||
5 |
|||
6 |
|||
7 |
|||
eight |
|||
nine |
|||
ten |
|||
eleven |
|||
-- new -- |
|||
1 |
|||
2 |
|||
3 |
|||
4 |
|||
5 |
|||
6 |
|||
7 |
|||
8 |
|||
9 |
|||
10 |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -5,7 +5,6 @@ |
|||
5 |
|||
6 |
|||
7 |
|||
-eight |
|||
-nine |
|||
-ten |
|||
-eleven |
|||
+8 |
|||
+9 |
|||
+10 |
|||
@ -0,0 +1,9 @@ |
|||
-- old -- |
|||
a |
|||
b |
|||
c^D |
|||
-- new -- |
|||
a |
|||
b |
|||
c^D |
|||
-- diff -- |
|||
@ -0,0 +1,18 @@ |
|||
-- old -- |
|||
a |
|||
b |
|||
c |
|||
-- new -- |
|||
a |
|||
b |
|||
c^D |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,3 +1,3 @@ |
|||
a |
|||
b |
|||
-c |
|||
+c |
|||
\ No newline at end of file |
|||
@ -0,0 +1,18 @@ |
|||
-- old -- |
|||
a |
|||
b |
|||
c^D |
|||
-- new -- |
|||
a |
|||
b |
|||
c |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,3 +1,3 @@ |
|||
a |
|||
b |
|||
-c |
|||
\ No newline at end of file |
|||
+c |
|||
@ -0,0 +1,62 @@ |
|||
-- old -- |
|||
1 |
|||
2 |
|||
3 |
|||
4 |
|||
5 |
|||
6 |
|||
7 |
|||
8 |
|||
9 |
|||
10 |
|||
11 |
|||
12 |
|||
13 |
|||
14 |
|||
14½ |
|||
15 |
|||
16 |
|||
17 |
|||
18 |
|||
19 |
|||
20 |
|||
-- new -- |
|||
1 |
|||
2 |
|||
3 |
|||
4 |
|||
5 |
|||
6 |
|||
8 |
|||
9 |
|||
10 |
|||
11 |
|||
12 |
|||
13 |
|||
14 |
|||
17 |
|||
18 |
|||
19 |
|||
20 |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -4,7 +4,6 @@ |
|||
4 |
|||
5 |
|||
6 |
|||
-7 |
|||
8 |
|||
9 |
|||
10 |
|||
@@ -12,9 +11,6 @@ |
|||
12 |
|||
13 |
|||
14 |
|||
-14½ |
|||
-15 |
|||
-16 |
|||
17 |
|||
18 |
|||
19 |
|||
@ -0,0 +1,5 @@ |
|||
-- old -- |
|||
hello world |
|||
-- new -- |
|||
hello world |
|||
-- diff -- |
|||
@ -0,0 +1,34 @@ |
|||
-- old -- |
|||
e |
|||
pi |
|||
4 |
|||
5 |
|||
6 |
|||
7 |
|||
8 |
|||
9 |
|||
10 |
|||
-- new -- |
|||
1 |
|||
2 |
|||
3 |
|||
4 |
|||
5 |
|||
6 |
|||
7 |
|||
8 |
|||
9 |
|||
10 |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,5 +1,6 @@ |
|||
-e |
|||
-pi |
|||
+1 |
|||
+2 |
|||
+3 |
|||
4 |
|||
5 |
|||
6 |
|||
@ -0,0 +1,40 @@ |
|||
Another example from Hunt and McIlroy, |
|||
“An Algorithm for Differential File Comparison.” |
|||
https://www.cs.dartmouth.edu/~doug/diff.pdf |
|||
|
|||
Anchored diff gives up on finding anything, |
|||
since there are no unique lines. |
|||
|
|||
-- old -- |
|||
a |
|||
b |
|||
c |
|||
a |
|||
b |
|||
b |
|||
a |
|||
-- new -- |
|||
c |
|||
a |
|||
b |
|||
a |
|||
b |
|||
c |
|||
-- diff -- |
|||
diff old new |
|||
--- old |
|||
+++ new |
|||
@@ -1,7 +1,6 @@ |
|||
-a |
|||
-b |
|||
-c |
|||
-a |
|||
-b |
|||
-b |
|||
-a |
|||
+c |
|||
+a |
|||
+b |
|||
+a |
|||
+b |
|||
+c |
|||
@ -0,0 +1,171 @@ |
|||
package jsonschema |
|||
|
|||
import ( |
|||
"bytes" |
|||
"encoding/json" |
|||
"errors" |
|||
) |
|||
|
|||
// Schema holds a JSON schema.
|
|||
type Schema struct { |
|||
// Name is the name of the property. For the parent/root property, this
|
|||
// is "root". For child properties, this is the name of the property.
|
|||
Name string `json:"-"` |
|||
|
|||
// Type is the type of the property.
|
|||
//
|
|||
// TODO: Union types (e.g. make this a []string).
|
|||
Type string |
|||
|
|||
// PrefixItems is a list of schemas for each item in a tuple. By
|
|||
// default, the tuple is "closed." unless Items is set to true or a
|
|||
// valid Schema.
|
|||
PrefixItems []*Schema |
|||
|
|||
// Items is the schema for each item in a list.
|
|||
//
|
|||
// If it is missing, or its JSON value is "null" or "false", it is nil.
|
|||
// If the JSON value is "true", it is set to the empty Schema. If the
|
|||
// JSON value is an object, it will be decoded as a Schema.
|
|||
Items *Schema |
|||
|
|||
// MinItems specifies the minimum number of items allowed in a list.
|
|||
MinItems int |
|||
|
|||
// MaxItems specifies the maximum number of items allowed in a list.
|
|||
MaxItems int |
|||
|
|||
// Properties is the schema for each property of an object.
|
|||
Properties []*Schema |
|||
|
|||
// Format is the format of the property. This is used to validate the
|
|||
// property against a specific format.
|
|||
//
|
|||
// It is the callers responsibility to validate the property against
|
|||
// the format.
|
|||
Format string |
|||
|
|||
// Minimum specifies the minimum value for numeric properties.
|
|||
Minimum float64 |
|||
|
|||
// Maximum specifies the maximum value for numeric properties.
|
|||
Maximum float64 |
|||
|
|||
// Enum is a list of valid values for the property.
|
|||
Enum []json.RawMessage |
|||
} |
|||
|
|||
func (s *Schema) UnmarshalJSON(data []byte) error { |
|||
type S Schema |
|||
w := struct { |
|||
Properties props |
|||
Items items |
|||
*S |
|||
}{ |
|||
S: (*S)(s), |
|||
} |
|||
if err := json.Unmarshal(data, &w); err != nil { |
|||
return err |
|||
} |
|||
if w.Items.set { |
|||
s.Items = &w.Items.Schema |
|||
} |
|||
s.Properties = w.Properties |
|||
return nil |
|||
} |
|||
|
|||
type items struct { |
|||
Schema |
|||
set bool |
|||
} |
|||
|
|||
func (s *items) UnmarshalJSON(data []byte) error { |
|||
switch b := data[0]; b { |
|||
case 't': |
|||
*s = items{set: true} |
|||
case '{': |
|||
type I items |
|||
if err := json.Unmarshal(data, (*I)(s)); err != nil { |
|||
return err |
|||
} |
|||
s.set = true |
|||
case 'n', 'f': |
|||
default: |
|||
return errors.New("invalid Items") |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// EffectiveType returns the effective type of the schema. If the Type field is
|
|||
// not empty, it is returned; otherwise:
|
|||
//
|
|||
// - If the schema has both Properties and Items, it returns an empty string.
|
|||
// - If the schema has Properties, it returns "object".
|
|||
// - If the schema has Items, it returns "array".
|
|||
// - If the schema has neither Properties nor Items, it returns "value".
|
|||
//
|
|||
// The returned string is never empty.
|
|||
func (d *Schema) EffectiveType() string { |
|||
if d.Type == "" { |
|||
if len(d.Properties) > 0 { |
|||
return "object" |
|||
} |
|||
if len(d.PrefixItems) > 0 || d.Items != nil { |
|||
return "array" |
|||
} |
|||
return "value" |
|||
} |
|||
return d.Type |
|||
} |
|||
|
|||
// props is an ordered list of properties. The order of the properties
|
|||
// is the order in which they were defined in the schema.
|
|||
type props []*Schema |
|||
|
|||
var _ json.Unmarshaler = (*props)(nil) |
|||
|
|||
func (v *props) UnmarshalJSON(data []byte) error { |
|||
if len(data) == 0 { |
|||
return nil |
|||
} |
|||
if data[0] != '{' { |
|||
return errors.New("expected object") |
|||
} |
|||
|
|||
d := json.NewDecoder(bytes.NewReader(data)) |
|||
|
|||
// TODO(bmizerany): Consider DisallowUnknownFields. Currently, we, like
|
|||
// llama.cpp, ignore unknown fields, which could be lead to unexpected
|
|||
// behavior for clients of this package, since they may not be aware
|
|||
// that "additionalFields", "itemsPrefix", etc, are being ignored.
|
|||
//
|
|||
// For now, just do what llama.cpp does.
|
|||
|
|||
t, err := d.Token() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if t != json.Delim('{') { |
|||
return errors.New("expected object") |
|||
} |
|||
for d.More() { |
|||
// Use the first token (map key) as the property name, then
|
|||
// decode the rest of the object fields into a Schema and
|
|||
// append.
|
|||
t, err := d.Token() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if t == json.Delim('}') { |
|||
return nil |
|||
} |
|||
s := &Schema{ |
|||
Name: t.(string), |
|||
} |
|||
if err := d.Decode(s); err != nil { |
|||
return err |
|||
} |
|||
*v = append(*v, s) |
|||
} |
|||
return nil |
|||
} |
|||
@ -0,0 +1,104 @@ |
|||
package jsonschema |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"reflect" |
|||
"strings" |
|||
"testing" |
|||
|
|||
"github.com/google/go-cmp/cmp" |
|||
) |
|||
|
|||
const testSchemaBasic = ` |
|||
{ |
|||
"properties": { |
|||
"tupleClosedEmpty": { "prefixItems": [] }, |
|||
"tupleClosedMissing": { "prefixItems": [{}] }, |
|||
"tupleClosedNull": { "prefixItems": [{}], "items": null }, |
|||
"tupleClosedFalse": { "prefixItems": [{}], "items": false }, |
|||
"tupleOpenTrue": { "prefixItems": [{}], "items": true }, |
|||
"tupleOpenEmpty": { "prefixItems": [{}], "items": {} }, |
|||
"tupleOpenTyped": { "prefixItems": [{}], "items": {"type": "boolean"} }, |
|||
"tupleOpenMax": { "prefixItems": [{}], "items": true, "maxItems": 3}, |
|||
|
|||
"array": { "items": {"type": "number"} }, |
|||
|
|||
"null": { "type": "null" }, |
|||
"string": { "type": "string" }, |
|||
"boolean": { "type": "boolean" } |
|||
} |
|||
} |
|||
` |
|||
|
|||
func TestSchemaUnmarshal(t *testing.T) { |
|||
var got *Schema |
|||
if err := json.Unmarshal([]byte(testSchemaBasic), &got); err != nil { |
|||
t.Fatalf("Unmarshal: %v", err) |
|||
} |
|||
want := &Schema{ |
|||
Properties: []*Schema{ |
|||
{Name: "tupleClosedEmpty", PrefixItems: []*Schema{}, Items: nil}, |
|||
{Name: "tupleClosedMissing", PrefixItems: []*Schema{{}}, Items: nil}, |
|||
{Name: "tupleClosedNull", PrefixItems: []*Schema{{}}, Items: nil}, |
|||
{Name: "tupleClosedFalse", PrefixItems: []*Schema{{}}, Items: nil}, |
|||
|
|||
{Name: "tupleOpenTrue", PrefixItems: []*Schema{{}}, Items: &Schema{}}, |
|||
{Name: "tupleOpenEmpty", PrefixItems: []*Schema{{}}, Items: &Schema{}}, |
|||
{Name: "tupleOpenTyped", PrefixItems: []*Schema{{}}, Items: &Schema{Type: "boolean"}}, |
|||
{Name: "tupleOpenMax", PrefixItems: []*Schema{{}}, Items: &Schema{}, MaxItems: 3}, |
|||
|
|||
{Name: "array", Items: &Schema{Type: "number"}}, |
|||
|
|||
{Name: "null", Type: "null"}, |
|||
{Name: "string", Type: "string"}, |
|||
{Name: "boolean", Type: "boolean"}, |
|||
}, |
|||
} |
|||
|
|||
if diff := cmp.Diff(want, got); diff != "" { |
|||
t.Errorf("(-want, +got)\n%s", diff) |
|||
} |
|||
} |
|||
|
|||
func TestEffectiveType(t *testing.T) { |
|||
const schema = ` |
|||
{"properties": { |
|||
"o": {"type": "object"}, |
|||
"a": {"type": "array"}, |
|||
"n": {"type": "number"}, |
|||
"s": {"type": "string"}, |
|||
"z": {"type": "null"}, |
|||
"b": {"type": "boolean"}, |
|||
|
|||
"t0": {"prefixItems": [{}], "items": {"type": "number"}}, |
|||
"t1": {"items": {"type": "number"}, "maxItems": 3}, |
|||
|
|||
"v": {"maxItems": 3} |
|||
}} |
|||
` |
|||
|
|||
var s *Schema |
|||
if err := json.Unmarshal([]byte(schema), &s); err != nil { |
|||
t.Fatalf("json.Unmarshal: %v", err) |
|||
} |
|||
|
|||
var got []string |
|||
for _, p := range s.Properties { |
|||
got = append(got, p.EffectiveType()) |
|||
} |
|||
|
|||
want := strings.Fields(` |
|||
object |
|||
array |
|||
number |
|||
string |
|||
null |
|||
boolean |
|||
array |
|||
array |
|||
value |
|||
`) |
|||
if !reflect.DeepEqual(want, got) { |
|||
t.Errorf("\ngot:\n\t%v\nwant:\n\t%v", got, want) |
|||
} |
|||
} |
|||
@ -0,0 +1,76 @@ |
|||
# This file holds tests for JSON schema to EBNF grammar conversions. |
|||
# |
|||
# The format is a JSON schema, followed by the expected EBNF grammar. Each test |
|||
# MAY be preceded by a comment that describes the test (e.g. the test name), followed by |
|||
# the JSON schema and the expected EBNF grammar. If no comment is present, the test |
|||
# name the tests number in the file (e.g. "#0", "#1", etc.) |
|||
# |
|||
# Blank lines signify the end or start of a new test. Comments can be added |
|||
# anywhere in the file, but they must be preceded by a '#' character and start at |
|||
# the beginning of the line. |
|||
|
|||
# default |
|||
{} |
|||
root ::= value; |
|||
|
|||
{"properties": {}} |
|||
root ::= value; |
|||
|
|||
# array |
|||
{"properties": {"a": {"type": "array", "items": {"type": "string"}}}} |
|||
root_0_tuple_0 ::= string; |
|||
root_0 ::= "[" ( root_0_tuple_0 )* "]"; |
|||
root ::= "{" "a" ":" root_0 "}"; |
|||
|
|||
# array with nested array |
|||
{"type": "array", "items": {"type": "array", "items": {"type": "string"}}} |
|||
root_tuple_0_tuple_0 ::= string; |
|||
root_tuple_0 ::= "[" ( root_tuple_0_tuple_0 )* "]"; |
|||
root ::= "[" ( root_tuple_0 )* "]"; |
|||
|
|||
# object |
|||
{"properties": {"e": {}}} |
|||
root_0 ::= value; |
|||
root ::= "{" "e" ":" root_0 "}"; |
|||
|
|||
# object with nested object |
|||
{"properties": {"o": {"type": "object", "properties": {"e": {}}}}} |
|||
root_0_0 ::= value; |
|||
root_0 ::= "{" "e" ":" root_0_0 "}"; |
|||
root ::= "{" "o" ":" root_0 "}"; |
|||
|
|||
# boolean |
|||
{"type": "boolean"} |
|||
root ::= boolean; |
|||
|
|||
# number |
|||
{"properties": {"n": {"type": "number", "minimum": 123, "maximum": 4567}}} |
|||
root_0 ::= number; |
|||
root ::= "{" "n" ":" root_0 "}"; |
|||
|
|||
# string |
|||
{"type": "string"} |
|||
root ::= string; |
|||
|
|||
# string with enum |
|||
{"type": "string", "enum": ["a", "b", "c"]} |
|||
root ::= ( "\"a\"" "|" "\"b\"" "|" "\"c\"" ); |
|||
|
|||
# spaces in key |
|||
{"properties": {"a b": {}}} |
|||
root_0 ::= value; |
|||
root ::= "{" "a b" ":" root_0 "}"; |
|||
|
|||
# issue7978 |
|||
{ "type": "object", "properties": { "steps": { "type": "array", "items": { "type": "object", "properties": { "explanation": { "type": "string" }, "output": { "type": "string" } }, "required": [ "explanation", "output" ], "additionalProperties": false } }, "final_answer": { "type": "string" } }, "required": [ "steps", "final_answer" ], "additionalProperties": false } |
|||
root_0_tuple_0_0 ::= string; |
|||
root_0_tuple_0_1 ::= string; |
|||
root_0_tuple_0 ::= "{" "explanation" ":" root_0_tuple_0_0 "," "output" ":" root_0_tuple_0_1 "}"; |
|||
root_0 ::= "[" ( root_0_tuple_0 )* "]"; |
|||
root_1 ::= string; |
|||
root ::= "{" "steps" ":" root_0 "," "final_answer" ":" root_1 "}"; |
|||
|
|||
# !! # special characters in key |
|||
# !! {"properties": {"a!b": {}}} |
|||
# !! !invalid character '!' in key |
|||
# !! |
|||
Loading…
Reference in new issue