File ollama-avoid-recomputing-special-vocabulary.patch of Package ollama
diff --git a/model/bytepairencoding_test.go b/model/bytepairencoding_test.go
index 7e310b56e51..ea9ffdf1c9e 100644
--- a/model/bytepairencoding_test.go
+++ b/model/bytepairencoding_test.go
@@ -60,11 +60,7 @@ func llama(t testing.TB) BytePairEncoding {
return NewBytePairEncoding(
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
- &Vocabulary{
- Values: tokens,
- Types: types,
- Merges: merges,
- },
+ NewVocabulary(tokens, types, nil, merges, nil, nil, false, false),
)
}
diff --git a/model/models/llama/model.go b/model/models/llama/model.go
index 3cf782d00fc..0a923f4e6ab 100644
--- a/model/models/llama/model.go
+++ b/model/models/llama/model.go
@@ -36,18 +36,19 @@ func New(c fs.Config) (model.Model, error) {
m := Model{
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
+ model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ c.Floats("tokenizer.ggml.token_score"),
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
- },
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ ),
),
Layers: make([]Layer, c.Uint("block_count")),
Options: &Options{
diff --git a/model/models/llama4/model.go b/model/models/llama4/model.go
index 8084760b0cc..5a1a4eae04b 100644
--- a/model/models/llama4/model.go
+++ b/model/models/llama4/model.go
@@ -32,22 +32,25 @@ func (p *Projector) Forward(ctx ml.Context, visionOutputs ml.Tensor) ml.Tensor {
}
func New(c fs.Config) (model.Model, error) {
+ vocab := model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ nil,
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
+ []int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
+ c.Ints("tokenizer.ggml.eos_token_ids")...,
+ ),
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ )
+
m := Model{
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer",
- `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
- []int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
- c.Ints("tokenizer.ggml.eos_token_ids")...,
- ),
- },
+ `[^\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
+ vocab,
),
ImageProcessor: newImageProcessor(c),
VisionModel: newVisionModel(c),
diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go
index 9d662fc1100..12844fc4610 100644
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@@ -34,18 +34,19 @@ func New(c fs.Config) (model.Model, error) {
m := &Model{
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
+ model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ nil,
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
- },
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ ),
),
TextModel: newTextModel(c),
VisionModel: newVisionModel(c),
diff --git a/model/models/qwen2/model.go b/model/models/qwen2/model.go
index 42338d0d693..a601a075a99 100644
--- a/model/models/qwen2/model.go
+++ b/model/models/qwen2/model.go
@@ -130,18 +130,19 @@ func New(c fs.Config) (model.Model, error) {
Layers: make([]DecoderLayer, c.Uint("block_count")),
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
+ model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ nil,
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
- },
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ ),
),
Options: Options{
hiddenSize: int(c.Uint("embedding_length")),
diff --git a/model/models/qwen25vl/model.go b/model/models/qwen25vl/model.go
index ee38cad9243..f3dcefc0a1c 100644
--- a/model/models/qwen25vl/model.go
+++ b/model/models/qwen25vl/model.go
@@ -30,18 +30,19 @@ func New(c fs.Config) (model.Model, error) {
m := &Model{
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
+ model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ nil,
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
- },
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ ),
),
TextModel: NewTextModel(c),
VisionModel: newVisionModel(c),
diff --git a/model/models/qwen3/model.go b/model/models/qwen3/model.go
index 7a83e0d04ac..64fbd190574 100644
--- a/model/models/qwen3/model.go
+++ b/model/models/qwen3/model.go
@@ -194,18 +194,19 @@ func New(c fs.Config) (model.Model, error) {
m := Model{
BytePairEncoding: model.NewBytePairEncoding(
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
- &model.Vocabulary{
- Values: c.Strings("tokenizer.ggml.tokens"),
- Types: c.Ints("tokenizer.ggml.token_type"),
- Merges: c.Strings("tokenizer.ggml.merges"),
- AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
- BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
- AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
- EOS: append(
+ model.NewVocabulary(
+ c.Strings("tokenizer.ggml.tokens"),
+ c.Ints("tokenizer.ggml.token_type"),
+ nil,
+ c.Strings("tokenizer.ggml.merges"),
+ []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+ append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
- },
+ c.Bool("tokenizer.ggml.add_bos_token", true),
+ c.Bool("tokenizer.ggml.add_eos_token", false),
+ ),
),
Layers: layers,
Options: &Options{
diff --git a/model/vocabulary.go b/model/vocabulary.go
index a86de58dfab..61ec04a4719 100644
--- a/model/vocabulary.go
+++ b/model/vocabulary.go
@@ -22,8 +22,7 @@ type Vocabulary struct {
BOS, EOS []int32
AddBOS, AddEOS bool
- specialOnce sync.Once
- special []string
+ special []string
valuesOnce sync.Once
values map[string]int32
@@ -32,6 +31,27 @@ type Vocabulary struct {
merge map[string]int32
}
+func NewVocabulary(values []string, types []int32, scores []float32, merges []string, bos, eos []int32, addBOS, addEOS bool) *Vocabulary {
+ v := &Vocabulary{
+ Values: values,
+ Types: types,
+ Scores: scores,
+ Merges: merges,
+ BOS: bos,
+ EOS: eos,
+ AddBOS: addBOS,
+ AddEOS: addEOS,
+ }
+ // Precompute special tokens slice
+ v.special = make([]string, 0, len(values)/10)
+ for i, t := range v.Types {
+ if t == TOKEN_TYPE_CONTROL || t == TOKEN_TYPE_USER_DEFINED {
+ v.special = append(v.special, v.Values[i])
+ }
+ }
+ return v
+}
+
func (v *Vocabulary) Is(id int32, special Special) bool {
switch special {
case SpecialBOS:
@@ -85,14 +105,6 @@ func (v *Vocabulary) Decode(id int32) string {
}
func (v *Vocabulary) SpecialVocabulary() []string {
- v.specialOnce.Do(func() {
- for i := range v.Values {
- if v.Types[i] == TOKEN_TYPE_CONTROL || v.Types[i] == TOKEN_TYPE_USER_DEFINED {
- v.special = append(v.special, v.Values[i])
- }
- }
- })
-
return v.special
}
diff --git a/model/vocabulary_test.go b/model/vocabulary_test.go
index 46f0ead23e5..62eec46596b 100644
--- a/model/vocabulary_test.go
+++ b/model/vocabulary_test.go
@@ -3,10 +3,11 @@ package model
import "testing"
func TestVocabulary_SpecialVocabulary(t *testing.T) {
- vocab := &Vocabulary{
- Values: []string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
- Types: []int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
- }
+ vocab := NewVocabulary(
+ []string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
+ []int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
+ nil, nil, nil, nil, false, false,
+ )
specialVocab := vocab.SpecialVocabulary()