feat: improve logging (#34)

* feat: imporve logging

* feat: bump to `0.4.0`

* docs: add features section in README
This commit is contained in:
Luc Georges 2023-10-18 11:12:37 +02:00 committed by GitHub
parent fdd55dd5c1
commit 4aacd7087b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 198 additions and 73 deletions

15
Cargo.lock generated
View file

@ -659,12 +659,13 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "llm-ls"
version = "0.3.0"
version = "0.4.0"
dependencies = [
"home",
"reqwest",
"ropey",
"serde",
"serde_json",
"tokenizers",
"tokio",
"tower-lsp",
@ -693,6 +694,7 @@ dependencies = [
"tree-sitter-scala",
"tree-sitter-swift",
"tree-sitter-typescript",
"uuid",
]
[[package]]
@ -2039,6 +2041,17 @@ dependencies = [
"serde",
]
[[package]]
name = "uuid"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
dependencies = [
"getrandom",
"rand",
"serde",
]
[[package]]
name = "valuable"
version = "0.1.0"

View file

@ -3,7 +3,27 @@
> [!IMPORTANT]
> This is currently a work in progress, expect things to be broken!
**llm-ls** is a LSP server leveraging LLMs for code completion (and more?).
**llm-ls** is a LSP server leveraging LLMs to make your development experience smoother and more efficient.
The goal of llm-ls is to provide a common platform for IDE extensions to be build on. llm-ls takes care of the heavy lifting with regards to interacting with LLMs so that extension code can be as lightweight as possible.
## Features
### Prompt
Uses the current file as context to generate the prompt. Can use "fill in the middle" or not depending on your needs.
It also makes sure that you are within the context window of the model by tokenizing the prompt.
### Telemetry
Gathers information about requests and completions that can enable retraining.
Note that **llm-ls** does not export any data anywhere (other than setting a user agent when querying the model API), everything is stored in a log file if you set the log level to `info`.
### Completion
**llm-ls** parses the AST of the code to determine if completions should be multi line, single line or empty (no completion).
## Compatible extensions
@ -12,3 +32,12 @@
- [x] [llm-intellij](https://github.com/huggingface/llm-intellij)
- [ ] [jupytercoder](https://github.com/bigcode-project/jupytercoder)
## Roadmap
- support getting context from multiple files in the workspace
- add `suffix_percent` setting that determines the ratio of # of tokens for the prefix vs the suffix in the prompt
- add context window fill percent or change context_window to `max_tokens`
- filter bad suggestions (repetitive, same as below, etc)
- support for ollama
- support for llama.cpp
- oltp traces ?

View file

@ -1,6 +1,6 @@
[package]
name = "llm-ls"
version = "0.3.0"
version = "0.4.0"
edition = "2021"
[[bin]]
@ -11,6 +11,7 @@ home = "0.5"
ropey = "1.6"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokenizers = { version = "0.13", default-features = false, features = ["onig"] }
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "rt-multi-thread"] }
tower-lsp = "0.20"
@ -40,3 +41,10 @@ tree-sitter-scala = "0.20"
tree-sitter-swift = "0.3"
tree-sitter-typescript = "0.20"
[dependencies.uuid]
version = "1.4"
features = [
"v4",
"fast-rng",
"serde",
]

View file

@ -166,8 +166,7 @@ fn get_parser(language_id: LanguageId) -> Result<Parser> {
}
pub(crate) struct Document {
#[allow(dead_code)]
language_id: LanguageId,
pub(crate) language_id: LanguageId,
pub(crate) text: Rope,
parser: Parser,
pub(crate) tree: Option<Tree>,

View file

@ -1,6 +1,7 @@
use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Serialize, Deserialize)]
pub(crate) enum LanguageId {
Bash,
C,

View file

@ -13,9 +13,10 @@ use tokio::sync::RwLock;
use tower_lsp::jsonrpc::{Error, Result};
use tower_lsp::lsp_types::*;
use tower_lsp::{Client, LanguageServer, LspService, Server};
use tracing::{debug, error, info, warn};
use tracing::{debug, error, info, info_span, warn, Instrument};
use tracing_appender::rolling;
use tracing_subscriber::EnvFilter;
use uuid::Uuid;
mod document;
mod language_id;
@ -24,13 +25,23 @@ const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600);
const NAME: &str = "llm-ls";
const VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
enum CompletionType {
Empty,
SingleLine,
MultiLine,
}
impl Display for CompletionType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CompletionType::Empty => write!(f, "empty"),
CompletionType::SingleLine => write!(f, "single_line"),
CompletionType::MultiLine => write!(f, "multi_line"),
}
}
}
fn should_complete(document: &Document, position: Position) -> CompletionType {
let row = position.line as usize;
let column = position.character as usize;
@ -129,7 +140,7 @@ struct APIRequest {
parameters: APIParams,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Serialize, Deserialize)]
struct Generation {
generated_text: String,
}
@ -196,6 +207,19 @@ where
Deserialize::deserialize(d).map(|b: Option<_>| b.unwrap_or(Ide::Unknown))
}
#[derive(Debug, Deserialize, Serialize)]
struct AcceptedCompletion {
request_id: Uuid,
accepted_completion: u32,
shown_completions: Vec<u32>,
}
#[derive(Debug, Deserialize, Serialize)]
struct RejectedCompletion {
request_id: Uuid,
shown_completions: Vec<u32>,
}
#[derive(Debug, Deserialize, Serialize)]
struct CompletionParams {
#[serde(flatten)]
@ -213,6 +237,12 @@ struct CompletionParams {
tls_skip_verify_insecure: bool,
}
#[derive(Debug, Deserialize, Serialize)]
struct CompletionResult {
request_id: Uuid,
completions: Vec<Completion>,
}
fn internal_error<E: Display>(err: E) -> Error {
let err_msg = err.to_string();
error!(err_msg);
@ -292,7 +322,7 @@ fn build_prompt(
fim.middle
);
let time = t.elapsed().as_millis();
info!(build_prompt_ms = time, "built prompt in {time} ms");
info!(prompt, build_prompt_ms = time, "built prompt in {time} ms");
Ok(prompt)
} else {
let mut token_count = context_window;
@ -321,7 +351,7 @@ fn build_prompt(
}
let prompt = before.into_iter().rev().collect::<Vec<_>>().join("");
let time = t.elapsed().as_millis();
info!(build_prompt_ms = time, "built prompt in {time} ms");
info!(prompt, build_prompt_ms = time, "built prompt in {time} ms");
Ok(prompt)
}
}
@ -334,6 +364,7 @@ async fn request_completion(
api_token: Option<&String>,
prompt: String,
) -> Result<Vec<Generation>> {
let t = Instant::now();
let res = http_client
.post(build_url(model))
.json(&APIRequest {
@ -345,11 +376,19 @@ async fn request_completion(
.await
.map_err(internal_error)?;
match res.json().await.map_err(internal_error)? {
APIResponse::Generation(gen) => Ok(vec![gen]),
APIResponse::Generations(gens) => Ok(gens),
APIResponse::Error(err) => Err(internal_error(err)),
}
let generations = match res.json().await.map_err(internal_error)? {
APIResponse::Generation(gen) => vec![gen],
APIResponse::Generations(gens) => gens,
APIResponse::Error(err) => return Err(internal_error(err)),
};
let time = t.elapsed().as_millis();
info!(
model,
compute_generations_ms = time,
generations = serde_json::to_string(&generations).map_err(internal_error)?,
"{model} computed generations in {time} ms"
);
Ok(generations)
}
fn parse_generations(
@ -505,13 +544,30 @@ fn build_url(model: &str) -> String {
}
impl Backend {
async fn get_completions(&self, params: CompletionParams) -> Result<Vec<Completion>> {
info!("get_completions {params:?}");
async fn get_completions(&self, params: CompletionParams) -> Result<CompletionResult> {
let request_id = Uuid::new_v4();
let span = info_span!("completion_request", %request_id);
async move {
let document_map = self.document_map.read().await;
let document = document_map
.get(params.text_document_position.text_document.uri.as_str())
.ok_or_else(|| internal_error("failed to find document"))?;
info!(
document_url = %params.text_document_position.text_document.uri,
cursor_line = ?params.text_document_position.position.line,
cursor_character = ?params.text_document_position.position.character,
language_id = %document.language_id,
model = params.model,
ide = %params.ide,
max_new_tokens = params.request_params.max_new_tokens,
temperature = params.request_params.temperature,
do_sample = params.request_params.do_sample,
top_p = params.request_params.top_p,
stop_tokens = ?params.request_params.stop_tokens,
"received completion request for {}",
params.text_document_position.text_document.uri
);
if params.api_token.is_none() {
let now = Instant::now();
let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await;
@ -523,9 +579,9 @@ impl Backend {
}
}
let completion_type = should_complete(document, params.text_document_position.position);
info!("completion type: {completion_type:?}");
info!(%completion_type, "completion type: {completion_type:?}");
if completion_type == CompletionType::Empty {
return Ok(vec![]);
return Ok(CompletionResult { request_id, completions: vec![]});
}
let tokenizer = get_tokenizer(
@ -562,11 +618,28 @@ impl Backend {
)
.await?;
Ok(parse_generations(
result,
&params.tokens_to_clear,
completion_type,
))
let completions = parse_generations(result, &params.tokens_to_clear, completion_type);
Ok(CompletionResult { request_id, completions })
}.instrument(span).await
}
async fn accept_completion(&self, accepted: AcceptedCompletion) -> Result<()> {
info!(
request_id = %accepted.request_id,
accepted_position = accepted.accepted_completion,
shown_completions = serde_json::to_string(&accepted.shown_completions).map_err(internal_error)?,
"accepted completion"
);
Ok(())
}
async fn reject_completion(&self, rejected: RejectedCompletion) -> Result<()> {
info!(
request_id = %rejected.request_id,
shown_completions = serde_json::to_string(&rejected.shown_completions).map_err(internal_error)?,
"rejected completion"
);
Ok(())
}
}
@ -724,6 +797,8 @@ async fn main() {
)),
})
.custom_method("llm-ls/getCompletions", Backend::get_completions)
.custom_method("llm-ls/acceptCompletion", Backend::accept_completion)
.custom_method("llm-ls/rejectCompletion", Backend::reject_completion)
.finish();
Server::new(stdin, stdout, socket).serve(service).await;