feat: add dimensions field to embed requests (#12242)

* feat: add field to truncate embeddings

* add openai embeddings for dimensions
This commit is contained in:
Michael Yang
2025-09-11 10:36:10 -07:00
committed by GitHub
parent 8a7e2055d2
commit feb18cd710
4 changed files with 16 additions and 9 deletions

View File

@@ -388,8 +388,12 @@ type EmbedRequest struct {
// this request. // this request.
KeepAlive *Duration `json:"keep_alive,omitempty"` KeepAlive *Duration `json:"keep_alive,omitempty"`
// Truncate truncates the input to fit the model's max sequence length.
Truncate *bool `json:"truncate,omitempty"` Truncate *bool `json:"truncate,omitempty"`
// Dimensions truncates the output embedding to the specified dimension.
Dimensions int `json:"dimensions,omitempty"`
// Options lists model-specific options. // Options lists model-specific options.
Options map[string]any `json:"options"` Options map[string]any `json:"options"`
} }

View File

@@ -1708,6 +1708,7 @@ Advanced parameters:
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true` - `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
- `dimensions`: number of dimensions for the embedding
### Examples ### Examples

View File

@@ -78,6 +78,7 @@ type JsonSchema struct {
type EmbedRequest struct { type EmbedRequest struct {
Input any `json:"input"` Input any `json:"input"`
Model string `json:"model"` Model string `json:"model"`
Dimensions int `json:"dimensions,omitempty"`
} }
type StreamOptions struct { type StreamOptions struct {
@@ -1005,7 +1006,7 @@ func EmbeddingsMiddleware() gin.HandlerFunc {
} }
var b bytes.Buffer var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(api.EmbedRequest{Model: req.Model, Input: req.Input}); err != nil { if err := json.NewEncoder(&b).Encode(api.EmbedRequest{Model: req.Model, Input: req.Input, Dimensions: req.Dimensions}); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
return return
} }

View File

@@ -558,7 +558,12 @@ func (s *Server) EmbedHandler(c *gin.Context) {
if err != nil { if err != nil {
return err return err
} }
embeddings[i] = normalize(embedding) // TODO: this first normalization should be done by the model
embedding = normalize(embedding)
if req.Dimensions > 0 && req.Dimensions < len(embedding) {
embedding = normalize(embedding[:req.Dimensions])
}
embeddings[i] = embedding
return nil return nil
}) })
} }
@@ -584,11 +589,7 @@ func normalize(vec []float32) []float32 {
sum += v * v sum += v * v
} }
norm := float32(0.0) norm := float32(1.0 / max(math.Sqrt(float64(sum)), 1e-12))
if sum > 0 {
norm = float32(1.0 / math.Sqrt(float64(sum)))
}
for i := range vec { for i := range vec {
vec[i] *= norm vec[i] *= norm
} }