add gpu offload for gptneox

This commit is contained in:
James Ravenscroft 2023-08-21 20:03:25 +01:00
parent 4a47251822
commit b79ab46b50
12 changed files with 338 additions and 1 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
build/
models/

20
.vscode/c_cpp_properties.json vendored Normal file
View File

@ -0,0 +1,20 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/extern/crow/include",
"${workspaceFolder}/include",
"${workspaceFolder}/include"
],
"defines": [],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.cmake-tools"
}
],
"version": 4
}

70
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,70 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(gdb) Launch TBP",
"type": "cppdbg",
"request": "launch",
"program": "/home/james/workspace/rafael-llm/turbopilot/build/bin/turbopilot",
"args": [
//TBP ARGS
"-v",
"-f",
"/home/james/Downloads/replit-code-v1-3b-q4_0.bin",
"-m",
"replit",
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
},
{
"description": "Set Disassembly Flavor to Intel",
"text": "-gdb-set disassembly-flavor intel",
"ignoreFailures": true
}
]
},
{
"name": "(gdb) Launch Replut",
"type": "cppdbg",
"request": "launch",
"program": "/home/james/workspace/rafael-llm/turbopilot/extern/ggml/build/bin/replit",
"args": [
// REPLIT ARGS
"-m",
"/home/james/Downloads/replit-code-v1-3b-q4_0.bin",
"-f",
"/home/james/workspace/rafael-llm/turbopilot/test.txt"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
},
{
"description": "Set Disassembly Flavor to Intel",
"text": "-gdb-set disassembly-flavor intel",
"ignoreFailures": true
}
]
},
]
}

28
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,28 @@
{
"tasks": [
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
],
"version": "2.0.0"
}

1
extern/crow vendored Submodule

@ -0,0 +1 @@
Subproject commit 4f3f5deaaa01825c63c83431bfa96ccec195f741

2
extern/ggml vendored

@ -1 +1 @@
Subproject commit f6365c0605ac86c6ab106cda0e8d6650e54097a7
Subproject commit 1a5d5f331de1d3c7ace40d86fe2373021a42f9ce

1
llama.cpp Submodule

@ -0,0 +1 @@
Subproject commit 771551a793c9976ed9cdfe7b8c69536af32af9f9

10
test.txt Normal file
View File

@ -0,0 +1,10 @@
#%%
import os
import cats

65
test_codegen2.py Normal file
View File

@ -0,0 +1,65 @@
#%%
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen2-1B")
model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen2-1B", trust_remote_code=True, revision="main")
#%%
model = model.to(device="cuda")
#%%
text = """
import os
def post_to_pastebin"""
input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
generated_ids = model.generate(input_ids, max_length=512)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
# %%
def format_model_input(prefix, suffix):
return prefix + "<mask_1>" + suffix + "<|endoftext|>" + "<sep>" + "<mask_1>"
prefix = """
import os
def post_to_pastebin"""
suffix = "result = post_to_pastebin(content)"
text = format_model_input(prefix, suffix)
input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
generated_ids = model.generate(input_ids, max_length=128)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=False))
# %%
def main():
text = """
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
if __name__ == '__main__':
main()
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
# %%
import os
def post_to_pastebin"""
input_ids = tokenizer(text, return_tensors="pt").to("cuda").input_ids
generated_ids = model.generate(input_ids, max_length=512)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
# %%
def post_to_pastebin(content):
input_ids = tokenizer(content, return_tensors="pt").to("cuda").input_ids
generated_ids = model.generate(input_ids, max_length=512)
return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

45
test_santa.py Normal file
View File

@ -0,0 +1,45 @@
#%%
import torch
from transformers import CodeGenTokenizer, GPTJForCausalLM
checkpoint = "/home/james/workspace/rafael-llm/codegen-2B-multi-gptj"
device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = CodeGenTokenizer.from_pretrained("Salesforce/codegen-350M-multi")
model = GPTJForCausalLM.from_pretrained(checkpoint).to(device)
#model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
#%%
# define the user model
class User:
# %%
code = """import os
import requests
#send the json data to pastebin
def send_data"""
inputs = tokenizer.encode(code, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_length=200)
response = tokenizer.decode(outputs[0])
print(response)
import requests
#send the json data to pastebin
def send_data(data):
url = "http://pastebin.com/api_post.php"
data = {"api_dev_key": "<api_key>", "api_user_key": "<user_key>", "api_content": data}
response = requests.post(url, data=data).text
return response
# %%
code
# %%

94
turbopilot.code-workspace Normal file
View File

@ -0,0 +1,94 @@
{
"folders": [
{
"path": "."
},
{
"path": "extern/ggml"
},
{
"path": "../../pymicrocosm"
}
],
"settings": {
"files.associations": {
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"*.tcc": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"map": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"fstream": "cpp",
"functional": "cpp",
"initializer_list": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"memory": "cpp",
"new": "cpp",
"numbers": "cpp",
"numeric": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"regex": "cpp",
"semaphore": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"stop_token": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"thread": "cpp",
"type_traits": "cpp",
"tuple": "cpp",
"typeinfo": "cpp",
"utility": "cpp",
"csignal": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"any": "cpp",
"strstream": "cpp",
"charconv": "cpp",
"cinttypes": "cpp",
"codecvt": "cpp",
"complex": "cpp",
"condition_variable": "cpp",
"coroutine": "cpp",
"list": "cpp",
"set": "cpp",
"algorithm": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"optional": "cpp",
"random": "cpp",
"source_location": "cpp",
"future": "cpp",
"iomanip": "cpp",
"iostream": "cpp",
"mutex": "cpp",
"span": "cpp",
"cfenv": "cpp",
"typeindex": "cpp",
"variant": "cpp",
"unordered_set": "cpp"
}
}
}

1
vscode-fauxpilot Submodule

@ -0,0 +1 @@
Subproject commit fb64ec7d299d6280301ee870ad26216156ca0cb2