From d89454de805c6d9507796cf2a262986db43ed849 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Fri, 5 Jul 2024 12:32:47 -0400 Subject: [PATCH] Use slot with cached prompt instead of least recently used (#5492) * Use common prefix to select slot * actually report `longest` --- llm/ext_server/server.cpp | 40 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 09970599..00a15b4a 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -1382,12 +1382,50 @@ struct llama_server_context } } + std::string common_prefix(const std::string& str1, const std::string& str2) { + auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin()); + return std::string(str1.begin(), mismatch_pair.first); + } + + // Find the slot that has the greatest common prefix + server_slot *prefix_slot(const json &prompt) { + if (!prompt.is_string()) { + return nullptr; + } + + std::string prompt_str = prompt.get(); + server_slot *slot = nullptr; + size_t longest = 0; + + for (server_slot &s : slots) { + if (s.available() && s.prompt.is_string()) { + std::string s_prompt = s.prompt.get(); + std::string prefix = common_prefix(s_prompt, prompt_str); + + if (prefix.size() > longest) { + slot = &s; + longest = prefix.size(); + } + } + } + + if (!slot) { + return get_slot(-1); + } + + LOG_INFO("slot with common prefix found", {{ + "slot_id", slot->id, + "characters", longest + }}); + return slot; + } + void process_single_task(task_server& task) { switch (task.type) { case TASK_TYPE_COMPLETION: { - server_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); + server_slot *slot = prefix_slot(task.data["prompt"]); if (slot == nullptr) { // if no slot is available, we defer this task for processing later