File ollama-avoid-recomputing-special-vocabulary.patch of Package ollama

diff --git a/model/bytepairencoding_test.go b/model/bytepairencoding_test.go
index 7e310b56e51..ea9ffdf1c9e 100644
--- a/model/bytepairencoding_test.go
+++ b/model/bytepairencoding_test.go
@@ -60,11 +60,7 @@ func llama(t testing.TB) BytePairEncoding {
 
 	return NewBytePairEncoding(
 		`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
-		&Vocabulary{
-			Values: tokens,
-			Types:  types,
-			Merges: merges,
-		},
+		NewVocabulary(tokens, types, nil, merges, nil, nil, false, false),
 	)
 }
 
diff --git a/model/models/llama/model.go b/model/models/llama/model.go
index 3cf782d00fc..0a923f4e6ab 100644
--- a/model/models/llama/model.go
+++ b/model/models/llama/model.go
@@ -36,18 +36,19 @@ func New(c fs.Config) (model.Model, error) {
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
+			model.NewVocabulary(
+				c.Strings("tokenizer.ggml.tokens"),
+				c.Ints("tokenizer.ggml.token_type"),
+				c.Floats("tokenizer.ggml.token_score"),
+				c.Strings("tokenizer.ggml.merges"),
+				[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
-			},
+				c.Bool("tokenizer.ggml.add_bos_token", true),
+				c.Bool("tokenizer.ggml.add_eos_token", false),
+			),
 		),
 		Layers: make([]Layer, c.Uint("block_count")),
 		Options: &Options{
diff --git a/model/models/llama4/model.go b/model/models/llama4/model.go
index 8084760b0cc..5a1a4eae04b 100644
--- a/model/models/llama4/model.go
+++ b/model/models/llama4/model.go
@@ -32,22 +32,25 @@ func (p *Projector) Forward(ctx ml.Context, visionOutputs ml.Tensor) ml.Tensor {
 }
 
 func New(c fs.Config) (model.Model, error) {
+	vocab := model.NewVocabulary(
+		c.Strings("tokenizer.ggml.tokens"),
+		c.Ints("tokenizer.ggml.token_type"),
+		nil,
+		c.Strings("tokenizer.ggml.merges"),
+		[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+		append(
+			[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
+			c.Ints("tokenizer.ggml.eos_token_ids")...,
+		),
+		c.Bool("tokenizer.ggml.add_bos_token", true),
+		c.Bool("tokenizer.ggml.add_eos_token", false),
+	)
+
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer",
-				`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
-					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
-					c.Ints("tokenizer.ggml.eos_token_ids")...,
-				),
-			},
+				`[^\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
+			vocab,
 		),
 		ImageProcessor: newImageProcessor(c),
 		VisionModel:    newVisionModel(c),
diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go
index 9d662fc1100..12844fc4610 100644
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@@ -34,18 +34,19 @@ func New(c fs.Config) (model.Model, error) {
 	m := &Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
+			model.NewVocabulary(
+				c.Strings("tokenizer.ggml.tokens"),
+				c.Ints("tokenizer.ggml.token_type"),
+				nil,
+				c.Strings("tokenizer.ggml.merges"),
+				[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
-			},
+				c.Bool("tokenizer.ggml.add_bos_token", true),
+				c.Bool("tokenizer.ggml.add_eos_token", false),
+			),
 		),
 		TextModel:           newTextModel(c),
 		VisionModel:         newVisionModel(c),
diff --git a/model/models/qwen2/model.go b/model/models/qwen2/model.go
index 42338d0d693..a601a075a99 100644
--- a/model/models/qwen2/model.go
+++ b/model/models/qwen2/model.go
@@ -130,18 +130,19 @@ func New(c fs.Config) (model.Model, error) {
 		Layers: make([]DecoderLayer, c.Uint("block_count")),
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
+			model.NewVocabulary(
+				c.Strings("tokenizer.ggml.tokens"),
+				c.Ints("tokenizer.ggml.token_type"),
+				nil,
+				c.Strings("tokenizer.ggml.merges"),
+				[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
-			},
+				c.Bool("tokenizer.ggml.add_bos_token", true),
+				c.Bool("tokenizer.ggml.add_eos_token", false),
+			),
 		),
 		Options: Options{
 			hiddenSize: int(c.Uint("embedding_length")),
diff --git a/model/models/qwen25vl/model.go b/model/models/qwen25vl/model.go
index ee38cad9243..f3dcefc0a1c 100644
--- a/model/models/qwen25vl/model.go
+++ b/model/models/qwen25vl/model.go
@@ -30,18 +30,19 @@ func New(c fs.Config) (model.Model, error) {
 	m := &Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
+			model.NewVocabulary(
+				c.Strings("tokenizer.ggml.tokens"),
+				c.Ints("tokenizer.ggml.token_type"),
+				nil,
+				c.Strings("tokenizer.ggml.merges"),
+				[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
-			},
+				c.Bool("tokenizer.ggml.add_bos_token", true),
+				c.Bool("tokenizer.ggml.add_eos_token", false),
+			),
 		),
 		TextModel:      NewTextModel(c),
 		VisionModel:    newVisionModel(c),
diff --git a/model/models/qwen3/model.go b/model/models/qwen3/model.go
index 7a83e0d04ac..64fbd190574 100644
--- a/model/models/qwen3/model.go
+++ b/model/models/qwen3/model.go
@@ -194,18 +194,19 @@ func New(c fs.Config) (model.Model, error) {
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
+			model.NewVocabulary(
+				c.Strings("tokenizer.ggml.tokens"),
+				c.Ints("tokenizer.ggml.token_type"),
+				nil,
+				c.Strings("tokenizer.ggml.merges"),
+				[]int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
-			},
+				c.Bool("tokenizer.ggml.add_bos_token", true),
+				c.Bool("tokenizer.ggml.add_eos_token", false),
+			),
 		),
 		Layers: layers,
 		Options: &Options{
diff --git a/model/vocabulary.go b/model/vocabulary.go
index a86de58dfab..61ec04a4719 100644
--- a/model/vocabulary.go
+++ b/model/vocabulary.go
@@ -22,8 +22,7 @@ type Vocabulary struct {
 	BOS, EOS       []int32
 	AddBOS, AddEOS bool
 
-	specialOnce sync.Once
-	special     []string
+	special []string
 
 	valuesOnce sync.Once
 	values     map[string]int32
@@ -32,6 +31,27 @@ type Vocabulary struct {
 	merge     map[string]int32
 }
 
+func NewVocabulary(values []string, types []int32, scores []float32, merges []string, bos, eos []int32, addBOS, addEOS bool) *Vocabulary {
+	v := &Vocabulary{
+		Values: values,
+		Types:  types,
+		Scores: scores,
+		Merges: merges,
+		BOS:    bos,
+		EOS:    eos,
+		AddBOS: addBOS,
+		AddEOS: addEOS,
+	}
+	// Precompute special tokens slice
+	v.special = make([]string, 0, len(values)/10)
+	for i, t := range v.Types {
+		if t == TOKEN_TYPE_CONTROL || t == TOKEN_TYPE_USER_DEFINED {
+			v.special = append(v.special, v.Values[i])
+		}
+	}
+	return v
+}
+
 func (v *Vocabulary) Is(id int32, special Special) bool {
 	switch special {
 	case SpecialBOS:
@@ -85,14 +105,6 @@ func (v *Vocabulary) Decode(id int32) string {
 }
 
 func (v *Vocabulary) SpecialVocabulary() []string {
-	v.specialOnce.Do(func() {
-		for i := range v.Values {
-			if v.Types[i] == TOKEN_TYPE_CONTROL || v.Types[i] == TOKEN_TYPE_USER_DEFINED {
-				v.special = append(v.special, v.Values[i])
-			}
-		}
-	})
-
 	return v.special
 }
 
diff --git a/model/vocabulary_test.go b/model/vocabulary_test.go
index 46f0ead23e5..62eec46596b 100644
--- a/model/vocabulary_test.go
+++ b/model/vocabulary_test.go
@@ -3,10 +3,11 @@ package model
 import "testing"
 
 func TestVocabulary_SpecialVocabulary(t *testing.T) {
-	vocab := &Vocabulary{
-		Values: []string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
-		Types:  []int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
-	}
+	vocab := NewVocabulary(
+		[]string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
+		[]int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
+		nil, nil, nil, nil, false, false,
+	)
 
 	specialVocab := vocab.SpecialVocabulary()
Places

File ollama-avoid-recomputing-special-vocabulary.patch of Package ollama

Places