Only warn of rate-limits when using HF endpoint (#58)
* Only warn of rate-limits when using HF endpoint Co-authored-by: Luc Georges <McPatate@users.noreply.github.com>
This commit is contained in:
parent
c9a44e591c
commit
1499fd6cbf
|
@ -206,11 +206,11 @@ fn parse_openai_text(text: &str) -> Result<Vec<Generation>, jsonrpc::Error> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const TGI: &str = "tgi";
|
pub(crate) const TGI: &str = "tgi";
|
||||||
const HUGGING_FACE: &str = "huggingface";
|
pub(crate) const HUGGING_FACE: &str = "huggingface";
|
||||||
const OLLAMA: &str = "ollama";
|
pub(crate) const OLLAMA: &str = "ollama";
|
||||||
const OPENAI: &str = "openai";
|
pub(crate) const OPENAI: &str = "openai";
|
||||||
const DEFAULT_ADAPTOR: &str = HUGGING_FACE;
|
pub(crate) const DEFAULT_ADAPTOR: &str = HUGGING_FACE;
|
||||||
|
|
||||||
fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error {
|
fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error {
|
||||||
internal_error(format!("Unknown adaptor {:?}", adaptor))
|
internal_error(format!("Unknown adaptor {:?}", adaptor))
|
||||||
|
|
|
@ -26,6 +26,7 @@ mod language_id;
|
||||||
const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600);
|
const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600);
|
||||||
pub const NAME: &str = "llm-ls";
|
pub const NAME: &str = "llm-ls";
|
||||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
const HF_INFERENCE_API_HOSTNAME: &str = "api-inference.huggingface.co";
|
||||||
|
|
||||||
fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result<usize> {
|
fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result<usize> {
|
||||||
Ok(rope.try_line_to_char(row).map_err(internal_error)?
|
Ok(rope.try_line_to_char(row).map_err(internal_error)?
|
||||||
|
@ -589,7 +590,7 @@ fn build_url(model: &str) -> String {
|
||||||
if model.starts_with("http://") || model.starts_with("https://") {
|
if model.starts_with("http://") || model.starts_with("https://") {
|
||||||
model.to_owned()
|
model.to_owned()
|
||||||
} else {
|
} else {
|
||||||
format!("https://api-inference.huggingface.co/models/{model}")
|
format!("https://{HF_INFERENCE_API_HOSTNAME}/models/{model}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -618,7 +619,8 @@ impl Backend {
|
||||||
"received completion request for {}",
|
"received completion request for {}",
|
||||||
params.text_document_position.text_document.uri
|
params.text_document_position.text_document.uri
|
||||||
);
|
);
|
||||||
if params.api_token.is_none() {
|
let is_using_inference_api = params.adaptor.as_ref().unwrap_or(&adaptors::DEFAULT_ADAPTOR.to_owned()).as_str() == adaptors::HUGGING_FACE;
|
||||||
|
if params.api_token.is_none() && is_using_inference_api {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await;
|
let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await;
|
||||||
if now.duration_since(*unauthenticated_warn_at) > MAX_WARNING_REPEAT {
|
if now.duration_since(*unauthenticated_warn_at) > MAX_WARNING_REPEAT {
|
||||||
|
|
Loading…
Reference in a new issue