Don't use flash attention on Google Colab

2024-10-01 01:26:03 -04:00 · 2024-07-23 19:50:56 -07:00 · 2024-07-23 19:50:56 -07:00 · 98ed6d3a66
commit 98ed6d3a66
parent 9d5513fda0
1 changed files with 2 additions and 2 deletions
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@ -74,7 +74,7 @@
        "# Parameters\n",
        "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
        "branch = \"8.0bpw\" #@param {type:\"string\"}\n",
-        "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
+        "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
        "api = False #@param {type:\"boolean\"}\n",
        "\n",
        "if api:\n",