From 5de568bffed863760c796c9df165ccdfd6d22ce2 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Fri, 10 Nov 2023 08:28:52 -0600 Subject: [PATCH 1/6] Add a simple log analysis example Signed-off-by: Matt Williams --- examples/python-loganalysis/Modelfile | 8 +++ examples/python-loganalysis/loganalysis.py | 51 ++++++++++++++++++++ examples/python-loganalysis/logtest.logfile | 32 ++++++++++++ examples/python-loganalysis/readme.md | 46 ++++++++++++++++++ examples/python-loganalysis/requirements.txt | 1 + 5 files changed, 138 insertions(+) create mode 100644 examples/python-loganalysis/Modelfile create mode 100644 examples/python-loganalysis/loganalysis.py create mode 100644 examples/python-loganalysis/logtest.logfile create mode 100644 examples/python-loganalysis/readme.md create mode 100644 examples/python-loganalysis/requirements.txt diff --git a/examples/python-loganalysis/Modelfile b/examples/python-loganalysis/Modelfile new file mode 100644 index 00000000..5237cb6e --- /dev/null +++ b/examples/python-loganalysis/Modelfile @@ -0,0 +1,8 @@ +FROM codebooga:latest + +SYSTEM """ +You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. +""" + +PARAMETER TEMPERATURE 0.3 + diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py new file mode 100644 index 00000000..ed1f173a --- /dev/null +++ b/examples/python-loganalysis/loganalysis.py @@ -0,0 +1,51 @@ +import sys +import re +import requests +import json + +prelines = 10 +postlines = 10 + +def find_errors_in_log_file(): + if len(sys.argv) < 2: + print("Usage: python loganalysis.py ") + return + + log_file_path = sys.argv[1] + with open(log_file_path, 'r') as log_file: + log_lines = log_file.readlines() + + error_lines = [] + for i, line in enumerate(log_lines): + if re.search('error', line, re.IGNORECASE): + error_lines.append(i) + + error_logs = [] + for error_line in error_lines: + start_index = max(0, error_line - prelines) + end_index = min(len(log_lines), error_line + postlines) + error_logs.extend(log_lines[start_index:end_index]) + + return error_logs + +error_logs = find_errors_in_log_file() + +data = { + "prompt": "\n".join(error_logs), + "model": "mattw/loganalyzer" +} + + +response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) +for line in response.iter_lines(): + if line: + json_data = json.loads(line) + if json_data['done'] == False: + print(json_data['response'], end='') + + + + + + + diff --git a/examples/python-loganalysis/logtest.logfile b/examples/python-loganalysis/logtest.logfile new file mode 100644 index 00000000..e4181bfe --- /dev/null +++ b/examples/python-loganalysis/logtest.logfile @@ -0,0 +1,32 @@ +2023-11-10 07:17:40 /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration +2023-11-10 07:17:40 /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/ +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh +2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf +2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf +2023-11-10 07:17:40 /docker-entrypoint.sh: Sourcing /docker-entrypoint.d/15-local-resolvers.envsh +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh +2023-11-10 07:17:40 /docker-entrypoint.sh: Configuration complete; ready for start up +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: using the "epoll" event method +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: nginx/1.25.3 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: built by gcc 12.2.0 (Debian 12.2.0-14) +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: OS: Linux 6.4.16-linuxkit +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker processes +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 29 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 30 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 31 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 32 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 33 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 34 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 35 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 36 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 37 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 38 +2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:43 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:17:44 2023/11/10 13:17:44 [error] 29#29: *1 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:8080", referrer: "http://localhost:8080/" +2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:44 +0000] "GET /favicon.ico HTTP/1.1" 404 555 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:17:50 2023/11/10 13:17:50 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080" +2023-11-10 07:17:50 192.168.65.1 - - [10/Nov/2023:13:17:50 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:18:53 2023/11/10 13:18:53 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080" +2023-11-10 07:18:53 192.168.65.1 - - [10/Nov/2023:13:18:53 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md new file mode 100644 index 00000000..cba22993 --- /dev/null +++ b/examples/python-loganalysis/readme.md @@ -0,0 +1,46 @@ +# Log Analysis example + +This example shows one possible way to create a log file analyzer. To use it, run: + +`python loganalysis.py ` + +You can try this with the `logtest.logfile` file included in this directory. + +## Review the code + +The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt: + +```plaintext +SYSTEM """ +You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. +""" +``` + +This model is available at https://ollama.ai/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create -f ` then `ollama push `. + +Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API. + +```python +data = { + "prompt": "\n".join(error_logs), + "model": "mattw/loganalyzer" +} +``` + +Finally, the streamed output is parsed and the response field in the output is printed to the line. + +```python +response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) +for line in response.iter_lines(): + if line: + json_data = json.loads(line) + if json_data['done'] == False: + print(json_data['response'], end='') + +``` + +## Next Steps + +There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines. + +Also try different models and different prompts to analyze the data. You could consider adding RAG to this to help understand newer log formats. diff --git a/examples/python-loganalysis/requirements.txt b/examples/python-loganalysis/requirements.txt new file mode 100644 index 00000000..9688b8ec --- /dev/null +++ b/examples/python-loganalysis/requirements.txt @@ -0,0 +1 @@ +Requests==2.31.0 From e4f59ba073ee55dd4d720db8a8883220859488b1 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Fri, 10 Nov 2023 08:55:17 -0600 Subject: [PATCH 2/6] better streaming plus gif Signed-off-by: Matt Williams --- examples/python-loganalysis/loganalysis.py | 2 +- examples/python-loganalysis/readme.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py index ed1f173a..0b7ee5fe 100644 --- a/examples/python-loganalysis/loganalysis.py +++ b/examples/python-loganalysis/loganalysis.py @@ -41,7 +41,7 @@ for line in response.iter_lines(): if line: json_data = json.loads(line) if json_data['done'] == False: - print(json_data['response'], end='') + print(json_data['response'], end='', flush=True) diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md index cba22993..58180444 100644 --- a/examples/python-loganalysis/readme.md +++ b/examples/python-loganalysis/readme.md @@ -1,5 +1,7 @@ # Log Analysis example +![loganalyzer 2023-11-10 08_53_29](https://github.com/jmorganca/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921) + This example shows one possible way to create a log file analyzer. To use it, run: `python loganalysis.py ` From 96bf9cafa79b85671b518d47cf89f213f25de0cb Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:30:17 -0800 Subject: [PATCH 3/6] Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald --- examples/python-loganalysis/loganalysis.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py index 0b7ee5fe..336e2e8f 100644 --- a/examples/python-loganalysis/loganalysis.py +++ b/examples/python-loganalysis/loganalysis.py @@ -43,9 +43,3 @@ for line in response.iter_lines(): if json_data['done'] == False: print(json_data['response'], end='', flush=True) - - - - - - From eced0d52abeab6f1e1beba4686ac89eaf57d6438 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:30:30 -0800 Subject: [PATCH 4/6] Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald --- examples/python-loganalysis/loganalysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py index 336e2e8f..e9385065 100644 --- a/examples/python-loganalysis/loganalysis.py +++ b/examples/python-loganalysis/loganalysis.py @@ -3,6 +3,7 @@ import re import requests import json +# prelines and postlines represent the number of lines of context to include in the output around the error prelines = 10 postlines = 10 From 64b7e0c218d8faac6533e729da720248f2196d15 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:31:05 -0800 Subject: [PATCH 5/6] Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald --- examples/python-loganalysis/loganalysis.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py index e9385065..2b7ddd48 100644 --- a/examples/python-loganalysis/loganalysis.py +++ b/examples/python-loganalysis/loganalysis.py @@ -16,16 +16,12 @@ def find_errors_in_log_file(): with open(log_file_path, 'r') as log_file: log_lines = log_file.readlines() - error_lines = [] - for i, line in enumerate(log_lines): - if re.search('error', line, re.IGNORECASE): - error_lines.append(i) - - error_logs = [] - for error_line in error_lines: - start_index = max(0, error_line - prelines) - end_index = min(len(log_lines), error_line + postlines) - error_logs.extend(log_lines[start_index:end_index]) +error_logs = [] + for i, line in enumerate(log_lines): + if "error" in line.lower(): + start_index = max(0, i - prelines) + end_index = min(len(log_lines), i + postlines + 1) + error_logs.extend(log_lines[start_index:end_index]) return error_logs From f4edc302a8535f81cba7417abc2b5a0d5eea1e20 Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Tue, 14 Nov 2023 10:31:22 -0800 Subject: [PATCH 6/6] Update examples/python-loganalysis/readme.md Co-authored-by: Bruce MacDonald --- examples/python-loganalysis/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md index 58180444..fbfb89a1 100644 --- a/examples/python-loganalysis/readme.md +++ b/examples/python-loganalysis/readme.md @@ -45,4 +45,4 @@ for line in response.iter_lines(): There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines. -Also try different models and different prompts to analyze the data. You could consider adding RAG to this to help understand newer log formats. +Also try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats.