|
|
@ -1382,12 +1382,50 @@ struct llama_server_context |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::string common_prefix(const std::string& str1, const std::string& str2) { |
|
|
|
|
|
auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin()); |
|
|
|
|
|
return std::string(str1.begin(), mismatch_pair.first); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Find the slot that has the greatest common prefix
|
|
|
|
|
|
server_slot *prefix_slot(const json &prompt) { |
|
|
|
|
|
if (!prompt.is_string()) { |
|
|
|
|
|
return nullptr; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::string prompt_str = prompt.get<std::string>(); |
|
|
|
|
|
server_slot *slot = nullptr; |
|
|
|
|
|
size_t longest = 0; |
|
|
|
|
|
|
|
|
|
|
|
for (server_slot &s : slots) { |
|
|
|
|
|
if (s.available() && s.prompt.is_string()) { |
|
|
|
|
|
std::string s_prompt = s.prompt.get<std::string>(); |
|
|
|
|
|
std::string prefix = common_prefix(s_prompt, prompt_str); |
|
|
|
|
|
|
|
|
|
|
|
if (prefix.size() > longest) { |
|
|
|
|
|
slot = &s; |
|
|
|
|
|
longest = prefix.size(); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!slot) { |
|
|
|
|
|
return get_slot(-1); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
LOG_INFO("slot with common prefix found", {{ |
|
|
|
|
|
"slot_id", slot->id, |
|
|
|
|
|
"characters", longest |
|
|
|
|
|
}}); |
|
|
|
|
|
return slot; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
void process_single_task(task_server& task) |
|
|
void process_single_task(task_server& task) |
|
|
{ |
|
|
{ |
|
|
switch (task.type) |
|
|
switch (task.type) |
|
|
{ |
|
|
{ |
|
|
case TASK_TYPE_COMPLETION: { |
|
|
case TASK_TYPE_COMPLETION: { |
|
|
server_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); |
|
|
server_slot *slot = prefix_slot(task.data["prompt"]); |
|
|
if (slot == nullptr) |
|
|
if (slot == nullptr) |
|
|
{ |
|
|
{ |
|
|
// if no slot is available, we defer this task for processing later
|
|
|
// if no slot is available, we defer this task for processing later
|
|
|
|