add gpu offload for gptneox

2024-10-01 01:06:01 -04:00 · 2023-08-21 20:03:25 +01:00 · 2023-08-21 20:03:25 +01:00 · b79ab46b50
commit b79ab46b50
parent 4a47251822
12 changed files with 338 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 build/
 models/
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@ -0,0 +1,20 @@
 {
    "configurations": [
        {
            "name": "Linux",
            "includePath": [
                "${workspaceFolder}/**",
                "${workspaceFolder}/extern/crow/include",
                "${workspaceFolder}/include",
                "${workspaceFolder}/include"
            ],
            "defines": [],
            "compilerPath": "/usr/bin/gcc",
            "cStandard": "c17",
            "cppStandard": "gnu++17",
            "intelliSenseMode": "linux-gcc-x64",
            "configurationProvider": "ms-vscode.cmake-tools"
        }
    ],
    "version": 4
 }
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,70 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "(gdb) Launch TBP",
            "type": "cppdbg",
            "request": "launch",
            "program": "/home/james/workspace/rafael-llm/turbopilot/build/bin/turbopilot",
            "args": [
                //TBP ARGS
                "-v",
                "-f",
                "/home/james/Downloads/replit-code-v1-3b-q4_0.bin",
                "-m",
                "replit",
            ],
            "stopAtEntry": false,
            "cwd": "${workspaceFolder}",
            "environment": [],
            "externalConsole": false,
            "MIMode": "gdb",
            "setupCommands": [
                {
                    "description": "Enable pretty-printing for gdb",
                    "text": "-enable-pretty-printing",
                    "ignoreFailures": true
                },
                {
                    "description": "Set Disassembly Flavor to Intel",
                    "text": "-gdb-set disassembly-flavor intel",
                    "ignoreFailures": true
                }
            ]
        },
        {
            "name": "(gdb) Launch Replut",
            "type": "cppdbg",
            "request": "launch",
            "program": "/home/james/workspace/rafael-llm/turbopilot/extern/ggml/build/bin/replit",
            "args": [
                // REPLIT ARGS
                "-m",
                "/home/james/Downloads/replit-code-v1-3b-q4_0.bin",
                "-f",
                "/home/james/workspace/rafael-llm/turbopilot/test.txt"
            ],
            "stopAtEntry": false,
            "cwd": "${workspaceFolder}",
            "environment": [],
            "externalConsole": false,
            "MIMode": "gdb",
            "setupCommands": [
                {
                    "description": "Enable pretty-printing for gdb",
                    "text": "-enable-pretty-printing",
                    "ignoreFailures": true
                },
                {
                    "description": "Set Disassembly Flavor to Intel",
                    "text": "-gdb-set disassembly-flavor intel",
                    "ignoreFailures": true
                }
            ]
        },
    ]
 }
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -0,0 +1,28 @@
 {
    "tasks": [
        {
            "type": "cppbuild",
            "label": "C/C++: g++ build active file",
            "command": "/usr/bin/g++",
            "args": [
                "-fdiagnostics-color=always",
                "-g",
                "${file}",
                "-o",
                "${fileDirname}/${fileBasenameNoExtension}"
            ],
            "options": {
                "cwd": "${fileDirname}"
            },
            "problemMatcher": [
                "$gcc"
            ],
            "group": {
                "kind": "build",
                "isDefault": true
            },
            "detail": "Task generated by Debugger."
        }
    ],
    "version": "2.0.0"
 }
--- a/extern/crow
+++ b/extern/crow
@ -0,0 +1 @@
 Subproject commit 4f3f5deaaa01825c63c83431bfa96ccec195f741
--- a/extern/ggml
+++ b/extern/ggml
@ -1 +1 @@
-Subproject commit f6365c0605ac86c6ab106cda0e8d6650e54097a7
+Subproject commit 1a5d5f331de1d3c7ace40d86fe2373021a42f9ce
--- a/llama.cpp
+++ b/llama.cpp
@ -0,0 +1 @@
 Subproject commit 771551a793c9976ed9cdfe7b8c69536af32af9f9
--- a/test.txt
+++ b/test.txt
@ -0,0 +1,10 @@
 #%%
 import os
 import cats
--- a/test_codegen2.py
+++ b/test_codegen2.py
@ -0,0 +1,65 @@
 #%%
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen2-1B")
 model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen2-1B", trust_remote_code=True, revision="main")
 #%%
 model = model.to(device="cuda")
 #%%
 text = """
 import os
 def post_to_pastebin"""
 input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
 generated_ids = model.generate(input_ids, max_length=512)
 print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 # %%
 def format_model_input(prefix, suffix):
  return prefix + "<mask_1>" + suffix + "<|endoftext|>" + "<sep>" + "<mask_1>"
 prefix = """
 import os
 def post_to_pastebin"""
 suffix = "result = post_to_pastebin(content)"
 text = format_model_input(prefix, suffix)
 input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
 generated_ids = model.generate(input_ids, max_length=128)
 print(tokenizer.decode(generated_ids[0], skip_special_tokens=False))
 # %%
 def main():
  text = """
  print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 if __name__ == '__main__':
    main()
  print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
  # %%
  import os
  def post_to_pastebin"""
  input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
  generated_ids = model.generate(input_ids, max_length=512)
 print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 # %%
 def post_to_pastebin(content):
  input_ids = tokenizer(content, return_tensors="pt").to("cuda").input_ids
  generated_ids = model.generate(input_ids, max_length=512)
  return tokenizer.decode(generated_ids[0], skip_special_tokens=True)
--- a/test_santa.py
+++ b/test_santa.py
@ -0,0 +1,45 @@
 #%%
 import torch
 from transformers import CodeGenTokenizer, GPTJForCausalLM
 checkpoint = "/home/james/workspace/rafael-llm/codegen-2B-multi-gptj"
 device = "cuda" # for GPU usage or "cpu" for CPU usage
 tokenizer = CodeGenTokenizer.from_pretrained("Salesforce/codegen-350M-multi")
 model = GPTJForCausalLM.from_pretrained(checkpoint).to(device)
 #model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
 #%%
 # define the user model
 class User:
 # %%
 code = """import os
 import requests
 #send the json data to pastebin
 def send_data"""
 inputs = tokenizer.encode(code, return_tensors="pt").to(device)
 outputs = model.generate(inputs, max_length=200)
 response = tokenizer.decode(outputs[0])
 print(response)
 import requests
 #send the json data to pastebin
 def send_data(data):
    url = "http://pastebin.com/api_post.php"
    data = {"api_dev_key": "<api_key>", "api_user_key": "<user_key>", "api_content": data}
    response = requests.post(url, data=data).text
    return response
 # %%
 code
 # %%
--- a/turbopilot.code-workspace
+++ b/turbopilot.code-workspace
@ -0,0 +1,94 @@
 {
 	"folders": [
 		{
 			"path": "."
 		},
 		{
 			"path": "extern/ggml"
 		},
 		{
 			"path": "../../pymicrocosm"
 		}
 	],
 	"settings": {
 		"files.associations": {
 			"array": "cpp",
 			"atomic": "cpp",
 			"bit": "cpp",
 			"*.tcc": "cpp",
 			"bitset": "cpp",
 			"cctype": "cpp",
 			"chrono": "cpp",
 			"clocale": "cpp",
 			"cmath": "cpp",
 			"compare": "cpp",
 			"concepts": "cpp",
 			"cstdint": "cpp",
 			"cstdio": "cpp",
 			"cstdlib": "cpp",
 			"cstring": "cpp",
 			"ctime": "cpp",
 			"cwchar": "cpp",
 			"cwctype": "cpp",
 			"deque": "cpp",
 			"map": "cpp",
 			"unordered_map": "cpp",
 			"vector": "cpp",
 			"exception": "cpp",
 			"fstream": "cpp",
 			"functional": "cpp",
 			"initializer_list": "cpp",
 			"iosfwd": "cpp",
 			"istream": "cpp",
 			"limits": "cpp",
 			"memory": "cpp",
 			"new": "cpp",
 			"numbers": "cpp",
 			"numeric": "cpp",
 			"ostream": "cpp",
 			"ratio": "cpp",
 			"regex": "cpp",
 			"semaphore": "cpp",
 			"sstream": "cpp",
 			"stdexcept": "cpp",
 			"stop_token": "cpp",
 			"streambuf": "cpp",
 			"string": "cpp",
 			"string_view": "cpp",
 			"system_error": "cpp",
 			"thread": "cpp",
 			"type_traits": "cpp",
 			"tuple": "cpp",
 			"typeinfo": "cpp",
 			"utility": "cpp",
 			"csignal": "cpp",
 			"cstdarg": "cpp",
 			"cstddef": "cpp",
 			"any": "cpp",
 			"strstream": "cpp",
 			"charconv": "cpp",
 			"cinttypes": "cpp",
 			"codecvt": "cpp",
 			"complex": "cpp",
 			"condition_variable": "cpp",
 			"coroutine": "cpp",
 			"list": "cpp",
 			"set": "cpp",
 			"algorithm": "cpp",
 			"iterator": "cpp",
 			"memory_resource": "cpp",
 			"optional": "cpp",
 			"random": "cpp",
 			"source_location": "cpp",
 			"future": "cpp",
 			"iomanip": "cpp",
 			"iostream": "cpp",
 			"mutex": "cpp",
 			"span": "cpp",
 			"cfenv": "cpp",
 			"typeindex": "cpp",
 			"variant": "cpp",
 			"unordered_set": "cpp"
 		}
 	}
 }
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit fb64ec7d299d6280301ee870ad26216156ca0cb2
		`@ -0,0 +1 @@`
							`Subproject commit 4f3f5deaaa01825c63c83431bfa96ccec195f741`
		`@ -1 +1 @@`
			`Subproject commit f6365c0605ac86c6ab106cda0e8d6650e54097a7`				`Subproject commit 1a5d5f331de1d3c7ace40d86fe2373021a42f9ce`
		`@ -0,0 +1 @@`
							`Subproject commit 771551a793c9976ed9cdfe7b8c69536af32af9f9`
		`@ -0,0 +1 @@`
							`Subproject commit fb64ec7d299d6280301ee870ad26216156ca0cb2`