193 lines
5.4 KiB
Go
193 lines
5.4 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
type Hosts struct {
|
|
Hosts []Host `json:"hosts"`
|
|
}
|
|
|
|
type Host struct {
|
|
IP string `json:"ip"`
|
|
Port string `json:"port"`
|
|
Containers []Container `json:"containers"`
|
|
}
|
|
|
|
type Models struct {
|
|
Models []Model `json:"models"`
|
|
}
|
|
|
|
type Model struct {
|
|
Container string `json:"container"`
|
|
Name string `json:"name"`
|
|
Model string `json:"model"`
|
|
Modified_at string `json:"modified_at"`
|
|
Size int `json:"size"`
|
|
Details Details `json:"details"`
|
|
Ip string `json:"ip"`
|
|
Port int `json:"port"`
|
|
Engine string `json:"engine"`
|
|
State string `json:"state"`
|
|
}
|
|
|
|
type Vllm struct {
|
|
Object string `json:"object"`
|
|
Data []struct {
|
|
ID string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int `json:"created"`
|
|
OwnedBy string `json:"owned_by"`
|
|
Root string `json:"root"`
|
|
Parent interface{} `json:"parent"`
|
|
MaxModelLen int `json:"max_model_len"`
|
|
Permission []struct {
|
|
ID string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int `json:"created"`
|
|
AllowCreateEngine bool `json:"allow_create_engine"`
|
|
AllowSampling bool `json:"allow_sampling"`
|
|
AllowLogprobs bool `json:"allow_logprobs"`
|
|
AllowSearchIndices bool `json:"allow_search_indices"`
|
|
AllowView bool `json:"allow_view"`
|
|
AllowFineTuning bool `json:"allow_fine_tuning"`
|
|
Organization string `json:"organization"`
|
|
Group interface{} `json:"group"`
|
|
IsBlocking bool `json:"is_blocking"`
|
|
} `json:"permission"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
type Details struct {
|
|
Parent_model string `json:"parent_model"`
|
|
Format string `json:"format"`
|
|
Family string `json:"family"`
|
|
Families []string `json:"families"`
|
|
Parameter_size string `json:"parameter_size"`
|
|
Quantization_level string `json:"quantization_level"`
|
|
}
|
|
|
|
func get_models_list(hosts Hosts) (Models, error) {
|
|
|
|
var ctr Containers
|
|
|
|
for _, host := range hosts.Hosts {
|
|
log.Printf("get llm server containers: %s -> %s\n", host.IP, host.Port)
|
|
ctr.Adds(get_containers_list(host, false))
|
|
}
|
|
|
|
retval := []Model{}
|
|
|
|
for _, engine := range ctr.Containers {
|
|
if engine.Engine == "ollama" {
|
|
if engine.State != "running" {
|
|
retval = append(retval, Model{Ip: engine.IP, Port: engine.Port, State: "stopped", Engine: "ollama"})
|
|
} else {
|
|
url := fmt.Sprintf("http://%s:%d/api/tags", engine.IP, engine.Port)
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to get %s: %v", url, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to read body: %v", err)
|
|
}
|
|
|
|
mods := Models{}
|
|
err = json.Unmarshal(body, &mods)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to unmarshal JSON: %v", err)
|
|
}
|
|
|
|
for n := range mods.Models {
|
|
mods.Models[n].Ip = engine.IP
|
|
mods.Models[n].Port = engine.Port
|
|
mods.Models[n].State = "running"
|
|
mods.Models[n].Container = engine.Name
|
|
mods.Models[n].Engine = "ollama"
|
|
log.Printf("mod: %s - %s:%d\n", mods.Models[n].Name, mods.Models[n].Ip, mods.Models[n].Port)
|
|
}
|
|
|
|
retval = append(retval, mods.Models...)
|
|
}
|
|
} else if engine.Engine == "vllm" {
|
|
if engine.State != "running" {
|
|
retval = append(retval, Model{Ip: engine.IP, Port: engine.Port, State: "stopped", Engine: "vllm"})
|
|
} else {
|
|
url := fmt.Sprintf("http://%s:%d/v1/models", engine.IP, engine.Port)
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to get %s: %v", url, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to read body: %v", err)
|
|
}
|
|
|
|
vllm := Vllm{}
|
|
err = json.Unmarshal(body, &vllm)
|
|
if err != nil {
|
|
return Models{}, fmt.Errorf("failed to unmarshal JSON: %v", err)
|
|
}
|
|
|
|
mods := Models{}
|
|
mods.Models = make([]Model, len(vllm.Data))
|
|
|
|
for n := range vllm.Data {
|
|
mods.Models[n].Name = vllm.Data[n].ID
|
|
mods.Models[n].Ip = engine.IP
|
|
mods.Models[n].Port = engine.Port
|
|
mods.Models[n].State = "running"
|
|
mods.Models[n].Container = engine.Name
|
|
mods.Models[n].Engine = "vllm"
|
|
log.Printf("mod: %s - %s:%d\n", mods.Models[n].Name, mods.Models[n].Ip, mods.Models[n].Port)
|
|
}
|
|
|
|
retval = append(retval, mods.Models...)
|
|
}
|
|
}
|
|
}
|
|
|
|
return Models{Models: retval}, nil
|
|
}
|
|
|
|
func (h *Hosts) Add(s string) {
|
|
ip_port := strings.Split(s, ":")
|
|
h.Hosts = append(h.Hosts, Host{IP: ip_port[0], Port: ip_port[1]})
|
|
}
|
|
|
|
func (h *Hosts) Init(server string) error {
|
|
|
|
if debug {
|
|
log.Printf("server: %s\n", server)
|
|
}
|
|
|
|
servers := strings.Split(server, ",")
|
|
|
|
if len(servers) == 0 {
|
|
return fmt.Errorf("no servers specified in config file: '%s'", server)
|
|
}
|
|
|
|
if len(servers) == 1 && servers[0] == "" {
|
|
return fmt.Errorf("no servers specified in config file: '%s'", server)
|
|
}
|
|
|
|
for _, server := range servers {
|
|
if !regexp.MustCompile(`^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}$`).MatchString(server) {
|
|
return fmt.Errorf("invalid server specified: '%s'", server)
|
|
}
|
|
h.Add(server)
|
|
}
|
|
|
|
return nil
|
|
}
|