From 73f3448ede681df8eed25d2a3402ce892069ae29 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Fri, 10 Nov 2023 16:33:56 -0600 Subject: [PATCH 1/8] add example showing use of JSON format Signed-off-by: Matt Williams --- .../predefinedschema.py | 31 ++++++++++++++++ .../randomaddresses.py | 35 +++++++++++++++++++ examples/python-json-datagenerator/readme.md | 13 +++++++ .../requirements.txt | 1 + 4 files changed, 80 insertions(+) create mode 100644 examples/python-json-datagenerator/predefinedschema.py create mode 100644 examples/python-json-datagenerator/randomaddresses.py create mode 100644 examples/python-json-datagenerator/readme.md create mode 100644 examples/python-json-datagenerator/requirements.txt diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py new file mode 100644 index 00000000..56d8afa2 --- /dev/null +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -0,0 +1,31 @@ +import requests +import json +import random + +model = "llama2" +template = { + "firstName": "", + "lastName": "", + "address": { + "theStreet": "", + "theCity": "", + "theState": "", + "theZipCode": "" + }, + "phoneNumber": "" +} + +prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}." + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} + +print(f"Generating a sample user") +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) +print(json.dumps(json.loads(json_data["response"]), indent=2)) diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py new file mode 100644 index 00000000..d5780c88 --- /dev/null +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -0,0 +1,35 @@ +import requests +import json +import random + +countries = [ + "the US", + "the UK", + "the Netherlands", + "Germany", + "Mexico", + "Canada", + "France", +] +country = random.choice(countries) +model = "llama2" + +prompt = ( + "generate one realisticly believable sample data set of a persons first name, last name, address in the" + + country + + ", and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." +) + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} + +print(f"Generating a sample user in {country}") +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) + +print(json.dumps(json.loads(json_data["response"]), indent=2)) diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md new file mode 100644 index 00000000..c311c42c --- /dev/null +++ b/examples/python-json-datagenerator/readme.md @@ -0,0 +1,13 @@ +# JSON Output Example + +New in version 0.1.9 is support for JSON output. There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in. + +## Review the Code + +Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body. + +When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country. + +In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with. + +Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output attractive. diff --git a/examples/python-json-datagenerator/requirements.txt b/examples/python-json-datagenerator/requirements.txt new file mode 100644 index 00000000..9688b8ec --- /dev/null +++ b/examples/python-json-datagenerator/requirements.txt @@ -0,0 +1 @@ +Requests==2.31.0 From b6817a83d89476f4eb3e45e039522f3e1b140b56 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Fri, 10 Nov 2023 16:41:48 -0600 Subject: [PATCH 2/8] Add gif and finish readme Signed-off-by: Matt Williams --- .../randomaddresses.py | 6 +---- examples/python-json-datagenerator/readme.md | 23 ++++++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py index d5780c88..afb97690 100644 --- a/examples/python-json-datagenerator/randomaddresses.py +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -14,11 +14,7 @@ countries = [ country = random.choice(countries) model = "llama2" -prompt = ( - "generate one realisticly believable sample data set of a persons first name, last name, address in the" - + country - + ", and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." -) +prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." data = { "prompt": prompt, diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md index c311c42c..872833f8 100644 --- a/examples/python-json-datagenerator/readme.md +++ b/examples/python-json-datagenerator/readme.md @@ -1,13 +1,34 @@ # JSON Output Example -New in version 0.1.9 is support for JSON output. There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in. +![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25) + +There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in. ## Review the Code Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body. +```python +prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} +``` + When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country. In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with. Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output attractive. + +```python +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) + +print(json.dumps(json.loads(json_data["response"]), indent=2)) +``` From f748331aa346c65913fb304036202acd7dad5525 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:32:45 -0800 Subject: [PATCH 3/8] Update examples/python-json-datagenerator/predefinedschema.py Co-authored-by: Bruce MacDonald --- examples/python-json-datagenerator/predefinedschema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py index 56d8afa2..7f7abd48 100644 --- a/examples/python-json-datagenerator/predefinedschema.py +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -15,7 +15,7 @@ template = { "phoneNumber": "" } -prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}." +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}." data = { "prompt": prompt, From acde0819d97fa82af7d95da5ae64e0b4fdf6923a Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:33:02 -0800 Subject: [PATCH 4/8] Update examples/python-json-datagenerator/randomaddresses.py Co-authored-by: Bruce MacDonald --- examples/python-json-datagenerator/randomaddresses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py index afb97690..ec6c920d 100644 --- a/examples/python-json-datagenerator/randomaddresses.py +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -14,7 +14,7 @@ countries = [ country = random.choice(countries) model = "llama2" -prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." data = { "prompt": prompt, From 69795d2db083e723d3c3f0b2b8b5f6673462e13e Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:33:16 -0800 Subject: [PATCH 5/8] Update examples/python-json-datagenerator/readme.md Co-authored-by: Bruce MacDonald --- examples/python-json-datagenerator/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md index 872833f8..0d825543 100644 --- a/examples/python-json-datagenerator/readme.md +++ b/examples/python-json-datagenerator/readme.md @@ -9,7 +9,7 @@ There are two python scripts in this example. `randomaddresses.py` generates ran Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body. ```python -prompt = f"generate one realisticly believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." data = { "prompt": prompt, From 47ffb81db7d000f8b68cec2f7845e9d6c0756966 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:33:34 -0800 Subject: [PATCH 6/8] Update examples/python-json-datagenerator/readme.md Co-authored-by: Bruce MacDonald --- examples/python-json-datagenerator/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md index 0d825543..2dc958e7 100644 --- a/examples/python-json-datagenerator/readme.md +++ b/examples/python-json-datagenerator/readme.md @@ -24,7 +24,7 @@ When running `randomaddresses.py` you will see that the schema changes and adapt In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with. -Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output attractive. +Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read. ```python response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) From b28a30f7ba7505a379f45c45b88efa97c5a22e5e Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 15 Nov 2023 18:23:36 -0500 Subject: [PATCH 7/8] Update examples/python-json-datagenerator/predefinedschema.py Co-authored-by: Bruce MacDonald --- examples/python-json-datagenerator/predefinedschema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py index 7f7abd48..abc399c4 100644 --- a/examples/python-json-datagenerator/predefinedschema.py +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -7,10 +7,10 @@ template = { "firstName": "", "lastName": "", "address": { - "theStreet": "", - "theCity": "", - "theState": "", - "theZipCode": "" + "street": "", + "city": "", + "state": "", + "zipCode": "" }, "phoneNumber": "" } From 30ae6e731e2b77ae082e80500b9504d908e476d1 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 15 Nov 2023 18:24:50 -0500 Subject: [PATCH 8/8] Update randomaddresses.py --- examples/python-json-datagenerator/randomaddresses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py index ec6c920d..5f27448f 100644 --- a/examples/python-json-datagenerator/randomaddresses.py +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -3,8 +3,8 @@ import json import random countries = [ - "the US", - "the UK", + "United States", + "United Kingdom", "the Netherlands", "Germany", "Mexico",