Only warn of rate-limits when using HF endpoint (#58)

* Only warn of rate-limits when using HF endpoint

Co-authored-by: Luc Georges <McPatate@users.noreply.github.com>
This commit is contained in:
Markus Hennerbichler 2024-02-05 20:05:52 +00:00 committed by GitHub
parent c9a44e591c
commit 1499fd6cbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 7 deletions

View file

@ -206,11 +206,11 @@ fn parse_openai_text(text: &str) -> Result<Vec<Generation>, jsonrpc::Error> {
} }
} }
const TGI: &str = "tgi"; pub(crate) const TGI: &str = "tgi";
const HUGGING_FACE: &str = "huggingface"; pub(crate) const HUGGING_FACE: &str = "huggingface";
const OLLAMA: &str = "ollama"; pub(crate) const OLLAMA: &str = "ollama";
const OPENAI: &str = "openai"; pub(crate) const OPENAI: &str = "openai";
const DEFAULT_ADAPTOR: &str = HUGGING_FACE; pub(crate) const DEFAULT_ADAPTOR: &str = HUGGING_FACE;
fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error { fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error {
internal_error(format!("Unknown adaptor {:?}", adaptor)) internal_error(format!("Unknown adaptor {:?}", adaptor))

View file

@ -26,6 +26,7 @@ mod language_id;
const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600); const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600);
pub const NAME: &str = "llm-ls"; pub const NAME: &str = "llm-ls";
pub const VERSION: &str = env!("CARGO_PKG_VERSION"); pub const VERSION: &str = env!("CARGO_PKG_VERSION");
const HF_INFERENCE_API_HOSTNAME: &str = "api-inference.huggingface.co";
fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result<usize> { fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result<usize> {
Ok(rope.try_line_to_char(row).map_err(internal_error)? Ok(rope.try_line_to_char(row).map_err(internal_error)?
@ -589,7 +590,7 @@ fn build_url(model: &str) -> String {
if model.starts_with("http://") || model.starts_with("https://") { if model.starts_with("http://") || model.starts_with("https://") {
model.to_owned() model.to_owned()
} else { } else {
format!("https://api-inference.huggingface.co/models/{model}") format!("https://{HF_INFERENCE_API_HOSTNAME}/models/{model}")
} }
} }
@ -618,7 +619,8 @@ impl Backend {
"received completion request for {}", "received completion request for {}",
params.text_document_position.text_document.uri params.text_document_position.text_document.uri
); );
if params.api_token.is_none() { let is_using_inference_api = params.adaptor.as_ref().unwrap_or(&adaptors::DEFAULT_ADAPTOR.to_owned()).as_str() == adaptors::HUGGING_FACE;
if params.api_token.is_none() && is_using_inference_api {
let now = Instant::now(); let now = Instant::now();
let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await; let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await;
if now.duration_since(*unauthenticated_warn_at) > MAX_WARNING_REPEAT { if now.duration_since(*unauthenticated_warn_at) > MAX_WARNING_REPEAT {