From fc5230dffadb5949e25c47ec85de0c8ff8d1bf70 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 14 Aug 2023 15:23:24 -0300 Subject: [PATCH 1/3] Add context to api docs --- docs/api.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/api.md b/docs/api.md index 9fec79a2..f361823b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -38,6 +38,7 @@ Advanced parameters: - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` - `system`: system prompt to (overrides what is defined in the `Modelfile`) - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) +- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory ### Request @@ -71,6 +72,7 @@ The final response in the stream also includes additional data about the generat - `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt - `eval_count`: number of tokens the response - `eval_duration`: time in nanoseconds spent generating the response +- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. @@ -78,6 +80,7 @@ To calculate how fast the response is generated in tokens per second (token/s), { "model": "llama2:7b", "created_at": "2023-08-04T19:22:45.499127Z", + "context": [1, 2, 3], "done": true, "total_duration": 5589157167, "load_duration": 3013701500, From 648f0974c6ad7cfff4eafade5891b1aefb3a1926 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 14 Aug 2023 15:27:13 -0300 Subject: [PATCH 2/3] python example --- docs/python/examples/client.py | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docs/python/examples/client.py diff --git a/docs/python/examples/client.py b/docs/python/examples/client.py new file mode 100644 index 00000000..4d3faeb4 --- /dev/null +++ b/docs/python/examples/client.py @@ -0,0 +1,40 @@ +import json +import requests + +# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` +model = 'llama2' # TODO: update this for whatever model you wish to use +context = [] # the context stores a conversation history, you can use this to make the model more context aware + +def generate(prompt): + global context + r = requests.post('http://localhost:11434/api/generate', + json={ + 'model': model, + 'prompt': prompt, + 'context': context, + }, + stream=True) + r.raise_for_status() + + for line in r.iter_lines(): + body = json.loads(line) + response_part = body.get('response', '') + # the response streams one token at a time, print that as we recieve it + print(response_part, end='', flush=True) + + if 'error' in body: + raise Exception(body['error']) + + if body.get('done', False): + context = body['context'] + return + +def main(): + while True: + user_input = input("Enter a prompt: ") + print() + generate(user_input) + print() + +if __name__ == "__main__": + main() \ No newline at end of file From af98a1773f2d8c511090b07e0135971d97e851b7 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 14 Aug 2023 16:38:44 -0300 Subject: [PATCH 3/3] update python example --- {docs/python/examples => examples/python}/client.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) rename {docs/python/examples => examples/python}/client.py (79%) diff --git a/docs/python/examples/client.py b/examples/python/client.py similarity index 79% rename from docs/python/examples/client.py rename to examples/python/client.py index 4d3faeb4..599ebec7 100644 --- a/docs/python/examples/client.py +++ b/examples/python/client.py @@ -3,10 +3,8 @@ import requests # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` model = 'llama2' # TODO: update this for whatever model you wish to use -context = [] # the context stores a conversation history, you can use this to make the model more context aware -def generate(prompt): - global context +def generate(prompt, context): r = requests.post('http://localhost:11434/api/generate', json={ 'model': model, @@ -26,14 +24,14 @@ def generate(prompt): raise Exception(body['error']) if body.get('done', False): - context = body['context'] - return + return body['context'] def main(): + context = [] # the context stores a conversation history, you can use this to make the model more context aware while True: user_input = input("Enter a prompt: ") print() - generate(user_input) + context = generate(user_input, context) print() if __name__ == "__main__":