Initial Library Loader for .NET Bindings / Update bindings to support newest changes (#763)

* Initial Library Loader * Load library as part of Model factory * Dynamically search and find the dlls * Update tests to use locally built runtimes * Fix dylib loading, add macos runtime support for sample/tests * Bypass automatic loading by default. * Only set CMAKE_OSX_ARCHITECTURES if not already set, allow cross-compile * Switch Loading again * Update build scripts for mac/linux * Update bindings to support newest breaking changes * Fix build * Use llmodel for Windows * Actually, it does need to be libllmodel * Name * Remove TFMs, bypass loading by default * Fix script * Delete mac script --------- Co-authored-by: Tim Miller <innerlogic4321@ghmail.com>
2024-10-01 01:06:10 -04:00 · 2023-06-13 21:05:34 +09:00 · 2023-06-13 21:05:34 +09:00 · 797891c995
commit 797891c995
parent 88616fde7f
21 changed files with 850 additions and 671 deletions
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@ -9,7 +9,9 @@ if(APPLE)
    set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
  else()
    # Build for the host architecture on macOS
-    set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
+    if(NOT CMAKE_OSX_ARCHITECTURES)
+      set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
+    endif()
  endif()
 endif()

--- a/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj
@ -1,18 +1,31 @@
 <Project Sdk="Microsoft.NET.Sdk">

-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net7.0</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
+    <PropertyGroup>
+        <OutputType>Exe</OutputType>
+        <TargetFramework>net7.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+    </PropertyGroup>

-  <ItemGroup>
-    <ProjectReference Include="..\Gpt4All\Gpt4All.csproj" />
-  </ItemGroup>
+    <ItemGroup>
+        <ProjectReference Include="..\Gpt4All\Gpt4All.csproj" />
+    </ItemGroup>

-  <ItemGroup>
-    <Folder Include="Properties\" />
-  </ItemGroup>
+    <ItemGroup>
+        <!-- Windows -->
+        <None Include="..\runtimes\win-x64\native\*.dll" Pack="true" PackagePath="runtimes\win-x64\native\%(Filename)%(Extension)" />
+        <!-- Linux -->
+        <None Include="..\runtimes\linux-x64\native\*.so" Pack="true" PackagePath="runtimes\linux-x64\native\%(Filename)%(Extension)" />
+        <!-- MacOS -->
+        <None Include="..\runtimes\osx\native\*.dylib" Pack="true" PackagePath="runtimes\osx\native\%(Filename)%(Extension)" />
+    </ItemGroup>

+    <ItemGroup>
+        <!-- Windows -->
+        <None Condition="$([MSBuild]::IsOSPlatform('Windows'))" Include="..\runtimes\win-x64\native\*.dll" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+        <!-- Linux -->
+        <None Condition="$([MSBuild]::IsOSPlatform('Linux'))" Include="..\runtimes\linux-x64\native\*.so" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+         <!-- MacOS -->
+        <None Condition="$([MSBuild]::IsOSPlatform('OSX'))" Include="..\runtimes\osx\native\*.dylib" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+    </ItemGroup>
 </Project>
--- a/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj
@ -21,7 +21,24 @@
    </ItemGroup>

    <ItemGroup>
-      <ProjectReference Include="..\Gpt4All\Gpt4All.csproj" />
+        <ProjectReference Include="..\Gpt4All\Gpt4All.csproj" />
    </ItemGroup>

+    <ItemGroup>
+        <!-- Windows -->
+        <None Include="..\runtimes\win-x64\native\*.dll" Pack="true" PackagePath="runtimes\win-x64\native\%(Filename)%(Extension)" />
+        <!-- Linux -->
+        <None Include="..\runtimes\linux-x64\native\*.so" Pack="true" PackagePath="runtimes\linux-x64\native\%(Filename)%(Extension)" />
+        <!-- MacOS -->
+        <None Include="..\runtimes\osx\native\*.dylib" Pack="true" PackagePath="runtimes\osx\native\%(Filename)%(Extension)" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <!-- Windows -->
+        <None Condition="$([MSBuild]::IsOSPlatform('Windows'))" Include="..\runtimes\win-x64\native\*.dll" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+        <!-- Linux -->
+        <None Condition="$([MSBuild]::IsOSPlatform('Linux'))" Include="..\runtimes\linux-x64\native\*.so" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+         <!-- MacOS -->
+        <None Condition="$([MSBuild]::IsOSPlatform('OSX'))" Include="..\runtimes\osx\native\*.dylib" Visible="False" CopyToOutputDirectory="PreserveNewest" />
+    </ItemGroup>
 </Project>
--- a/gpt4all-bindings/csharp/Gpt4All.Tests/ModelFactoryTests.cs
+++ b/gpt4all-bindings/csharp/Gpt4All.Tests/ModelFactoryTests.cs
@ -14,18 +14,18 @@ public class ModelFactoryTests
    [Fact]
    public void CanLoadLlamaModel()
    {
-        using var model = _modelFactory.LoadLlamaModel(Constants.LLAMA_MODEL_PATH);
+        using var model = _modelFactory.LoadModel(Constants.LLAMA_MODEL_PATH);
    }

    [Fact]
    public void CanLoadGptjModel()
    {
-        using var model = _modelFactory.LoadGptjModel(Constants.GPTJ_MODEL_PATH);
+        using var model = _modelFactory.LoadModel(Constants.GPTJ_MODEL_PATH);
    }

    [Fact]
    public void CanLoadMptModel()
    {
-        using var model = _modelFactory.LoadMptModel(Constants.MPT_MODEL_PATH);
+        using var model = _modelFactory.LoadModel(Constants.MPT_MODEL_PATH);
    }
 }
--- a/gpt4all-bindings/csharp/Gpt4All/Bindings/LLModel.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Bindings/LLModel.cs
@ -1,247 +1,222 @@
-using Microsoft.Extensions.Logging;
-using Microsoft.Extensions.Logging.Abstractions;
-
-namespace Gpt4All.Bindings;
-
-/// <summary>
-/// Arguments for the response processing callback
-/// </summary>
-/// <param name="TokenId">The token id of the response</param>
-/// <param name="Response"> The response string. NOTE: a token_id of -1 indicates the string is an error string</param>
-/// <return>
-/// A bool indicating whether the model should keep generating
-/// </return>
-public record ModelResponseEventArgs(int TokenId, string Response)
-{
-    public bool IsError => TokenId == -1;
-}
-
-/// <summary>
-/// Arguments for the prompt processing callback
-/// </summary>
-/// <param name="TokenId">The token id of the prompt</param>
-/// <return>
-/// A bool indicating whether the model should keep processing
-/// </return>
-public record ModelPromptEventArgs(int TokenId)
-{
-}
-
-/// <summary>
-/// Arguments for the recalculating callback
-/// </summary>
-/// <param name="IsRecalculating"> whether the model is recalculating the context.</param>
-/// <return>
-/// A bool indicating whether the model should keep generating
-/// </return>
-public record ModelRecalculatingEventArgs(bool IsRecalculating);
-
-/// <summary>
-/// Base class and universal wrapper for GPT4All language models built around llmodel C-API.
-/// </summary>
-public class LLModel : ILLModel
-{
-    protected readonly IntPtr _handle;
-    private readonly ModelType _modelType;
-    private readonly ILogger _logger;
-    private bool _disposed;
-
-    public ModelType ModelType => _modelType;
-
-    internal LLModel(IntPtr handle, ModelType modelType, ILogger? logger = null)
-    {
-        _handle = handle;
-        _modelType = modelType;
-        _logger = logger ?? NullLogger.Instance;
-    }
-
-    /// <summary>
-    /// Create a new model from a pointer
-    /// </summary>
-    /// <param name="handle">Pointer to underlying model</param>
-    /// <param name="modelType">The model type</param>
-    public static LLModel Create(IntPtr handle, ModelType modelType, ILogger? logger = null)
-    {
-        return new LLModel(handle, modelType, logger: logger);
-    }
-
-    /// <summary>
-    /// Generate a response using the model
-    /// </summary>
-    /// <param name="text">The input promp</param>
-    /// <param name="context">The context</param>
-    /// <param name="promptCallback">A callback function for handling the processing of prompt</param>
-    /// <param name="responseCallback">A callback function for handling the generated response</param>
-    /// <param name="recalculateCallback">A callback function for handling recalculation requests</param>
-    /// <param name="cancellationToken"></param>
-    public void Prompt(
-        string text,
-        LLModelPromptContext context,
-        Func<ModelPromptEventArgs, bool>? promptCallback = null,
-        Func<ModelResponseEventArgs, bool>? responseCallback = null,
-        Func<ModelRecalculatingEventArgs, bool>? recalculateCallback = null,
-        CancellationToken cancellationToken = default)
-    {
-        GC.KeepAlive(promptCallback);
-        GC.KeepAlive(responseCallback);
-        GC.KeepAlive(recalculateCallback);
-        GC.KeepAlive(cancellationToken);
-
-        _logger.LogInformation("Prompt input='{Prompt}' ctx={Context}", text, context.Dump());
-
-        NativeMethods.llmodel_prompt(
-            _handle,
-            text,
-            (tokenId) =>
-            {
-                if (cancellationToken.IsCancellationRequested) return false;
-                if (promptCallback == null) return true;
-                var args = new ModelPromptEventArgs(tokenId);
-                return promptCallback(args);
-            },
-            (tokenId, response) =>
-            {
-                if (cancellationToken.IsCancellationRequested)
-                {
-                    _logger.LogDebug("ResponseCallback evt=CancellationRequested");
-                    return false;
-                }
-
-                if (responseCallback == null) return true;
-                var args = new ModelResponseEventArgs(tokenId, response);
-                return responseCallback(args);
-            },
-            (isRecalculating) =>
-            {
-                if (cancellationToken.IsCancellationRequested) return false;
-                if (recalculateCallback == null) return true;
-                var args = new ModelRecalculatingEventArgs(isRecalculating);
-                return recalculateCallback(args);
-            },
-            ref context.UnderlyingContext
-        );
-    }
-
-    /// <summary>
-    ///  Set the number of threads to be used by the model.
-    /// </summary>
-    /// <param name="threadCount">The new thread count</param>
-    public void SetThreadCount(int threadCount)
-    {
-        NativeMethods.llmodel_setThreadCount(_handle, threadCount);
-    }
-
-    /// <summary>
-    /// Get  the number of threads used by the model.
-    /// </summary>
-    /// <returns>the number of threads used by the model</returns>
-    public int GetThreadCount()
-    {
-        return NativeMethods.llmodel_threadCount(_handle);
-    }
-
-    /// <summary>
-    /// Get the size of the internal state of the model.
-    /// </summary>
-    /// <remarks>
-    /// This state data is specific to the type of model you have created.
-    /// </remarks>
-    /// <returns>the size in bytes of the internal state of the model</returns>
-    public ulong GetStateSizeBytes()
-    {
-        return NativeMethods.llmodel_get_state_size(_handle);
-    }
-
-    /// <summary>
-    /// Saves the internal state of the model to the specified destination address.
-    /// </summary>
-    /// <param name="source">A pointer to the src</param>
-    /// <returns>The number of bytes copied</returns>
-    public unsafe ulong SaveStateData(byte* source)
-    {
-        return NativeMethods.llmodel_save_state_data(_handle, source);
-    }
-
-    /// <summary>
-    /// Restores the internal state of the model using data from the specified address.
-    /// </summary>
-    /// <param name="destination">A pointer to destination</param>
-    /// <returns>the number of bytes read</returns>
-    public unsafe ulong RestoreStateData(byte* destination)
-    {
-        return NativeMethods.llmodel_restore_state_data(_handle, destination);
-    }
-
-    /// <summary>
-    /// Check if the model is loaded.
-    /// </summary>
-    /// <returns>true if the model was loaded successfully, false otherwise.</returns>
-    public bool IsLoaded()
-    {
-        return NativeMethods.llmodel_isModelLoaded(_handle);
-    }
-
-    /// <summary>
-    /// Load the model from a file.
-    /// </summary>
-    /// <param name="modelPath">The path to the model file.</param>
-    /// <returns>true if the model was loaded successfully, false otherwise.</returns>
-    public bool Load(string modelPath)
-    {
-        return NativeMethods.llmodel_loadModel(_handle, modelPath);
-    }
-
-    protected void Destroy()
-    {
-        NativeMethods.llmodel_model_destroy(_handle);
-    }
-
-    protected void DestroyLLama()
-    {
-        NativeMethods.llmodel_llama_destroy(_handle);
-    }
-
-    protected void DestroyGptj()
-    {
-        NativeMethods.llmodel_gptj_destroy(_handle);
-    }
-
-    protected void DestroyMtp()
-    {
-        NativeMethods.llmodel_mpt_destroy(_handle);
-    }
-
-    protected virtual void Dispose(bool disposing)
-    {
-        if (_disposed) return;
-
-        if (disposing)
-        {
-            // dispose managed state
-        }
-
-        switch (_modelType)
-        {
-            case ModelType.LLAMA:
-                DestroyLLama();
-                break;
-            case ModelType.GPTJ:
-                DestroyGptj();
-                break;
-            case ModelType.MPT:
-                DestroyMtp();
-                break;
-            default:
-                Destroy();
-                break;
-        }
-
-        _disposed = true;
-    }
-
-    public void Dispose()
-    {
-        Dispose(disposing: true);
-        GC.SuppressFinalize(this);
-    }
-}
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging.Abstractions;
+
+namespace Gpt4All.Bindings;
+
+/// <summary>
+/// Arguments for the response processing callback
+/// </summary>
+/// <param name="TokenId">The token id of the response</param>
+/// <param name="Response"> The response string. NOTE: a token_id of -1 indicates the string is an error string</param>
+/// <return>
+/// A bool indicating whether the model should keep generating
+/// </return>
+public record ModelResponseEventArgs(int TokenId, string Response)
+{
+    public bool IsError => TokenId == -1;
+}
+
+/// <summary>
+/// Arguments for the prompt processing callback
+/// </summary>
+/// <param name="TokenId">The token id of the prompt</param>
+/// <return>
+/// A bool indicating whether the model should keep processing
+/// </return>
+public record ModelPromptEventArgs(int TokenId)
+{
+}
+
+/// <summary>
+/// Arguments for the recalculating callback
+/// </summary>
+/// <param name="IsRecalculating"> whether the model is recalculating the context.</param>
+/// <return>
+/// A bool indicating whether the model should keep generating
+/// </return>
+public record ModelRecalculatingEventArgs(bool IsRecalculating);
+
+/// <summary>
+/// Base class and universal wrapper for GPT4All language models built around llmodel C-API.
+/// </summary>
+public class LLModel : ILLModel
+{
+    protected readonly IntPtr _handle;
+    private readonly ModelType _modelType;
+    private readonly ILogger _logger;
+    private bool _disposed;
+
+    public ModelType ModelType => _modelType;
+
+    internal LLModel(IntPtr handle, ModelType modelType, ILogger? logger = null)
+    {
+        _handle = handle;
+        _modelType = modelType;
+        _logger = logger ?? NullLogger.Instance;
+    }
+
+    /// <summary>
+    /// Create a new model from a pointer
+    /// </summary>
+    /// <param name="handle">Pointer to underlying model</param>
+    /// <param name="modelType">The model type</param>
+    public static LLModel Create(IntPtr handle, ModelType modelType, ILogger? logger = null)
+    {
+        return new LLModel(handle, modelType, logger: logger);
+    }
+
+    /// <summary>
+    /// Generate a response using the model
+    /// </summary>
+    /// <param name="text">The input promp</param>
+    /// <param name="context">The context</param>
+    /// <param name="promptCallback">A callback function for handling the processing of prompt</param>
+    /// <param name="responseCallback">A callback function for handling the generated response</param>
+    /// <param name="recalculateCallback">A callback function for handling recalculation requests</param>
+    /// <param name="cancellationToken"></param>
+    public void Prompt(
+        string text,
+        LLModelPromptContext context,
+        Func<ModelPromptEventArgs, bool>? promptCallback = null,
+        Func<ModelResponseEventArgs, bool>? responseCallback = null,
+        Func<ModelRecalculatingEventArgs, bool>? recalculateCallback = null,
+        CancellationToken cancellationToken = default)
+    {
+        GC.KeepAlive(promptCallback);
+        GC.KeepAlive(responseCallback);
+        GC.KeepAlive(recalculateCallback);
+        GC.KeepAlive(cancellationToken);
+
+        _logger.LogInformation("Prompt input='{Prompt}' ctx={Context}", text, context.Dump());
+
+        NativeMethods.llmodel_prompt(
+            _handle,
+            text,
+            (tokenId) =>
+            {
+                if (cancellationToken.IsCancellationRequested) return false;
+                if (promptCallback == null) return true;
+                var args = new ModelPromptEventArgs(tokenId);
+                return promptCallback(args);
+            },
+            (tokenId, response) =>
+            {
+                if (cancellationToken.IsCancellationRequested)
+                {
+                    _logger.LogDebug("ResponseCallback evt=CancellationRequested");
+                    return false;
+                }
+
+                if (responseCallback == null) return true;
+                var args = new ModelResponseEventArgs(tokenId, response);
+                return responseCallback(args);
+            },
+            (isRecalculating) =>
+            {
+                if (cancellationToken.IsCancellationRequested) return false;
+                if (recalculateCallback == null) return true;
+                var args = new ModelRecalculatingEventArgs(isRecalculating);
+                return recalculateCallback(args);
+            },
+            ref context.UnderlyingContext
+        );
+    }
+
+    /// <summary>
+    ///  Set the number of threads to be used by the model.
+    /// </summary>
+    /// <param name="threadCount">The new thread count</param>
+    public void SetThreadCount(int threadCount)
+    {
+        NativeMethods.llmodel_setThreadCount(_handle, threadCount);
+    }
+
+    /// <summary>
+    /// Get  the number of threads used by the model.
+    /// </summary>
+    /// <returns>the number of threads used by the model</returns>
+    public int GetThreadCount()
+    {
+        return NativeMethods.llmodel_threadCount(_handle);
+    }
+
+    /// <summary>
+    /// Get the size of the internal state of the model.
+    /// </summary>
+    /// <remarks>
+    /// This state data is specific to the type of model you have created.
+    /// </remarks>
+    /// <returns>the size in bytes of the internal state of the model</returns>
+    public ulong GetStateSizeBytes()
+    {
+        return NativeMethods.llmodel_get_state_size(_handle);
+    }
+
+    /// <summary>
+    /// Saves the internal state of the model to the specified destination address.
+    /// </summary>
+    /// <param name="source">A pointer to the src</param>
+    /// <returns>The number of bytes copied</returns>
+    public unsafe ulong SaveStateData(byte* source)
+    {
+        return NativeMethods.llmodel_save_state_data(_handle, source);
+    }
+
+    /// <summary>
+    /// Restores the internal state of the model using data from the specified address.
+    /// </summary>
+    /// <param name="destination">A pointer to destination</param>
+    /// <returns>the number of bytes read</returns>
+    public unsafe ulong RestoreStateData(byte* destination)
+    {
+        return NativeMethods.llmodel_restore_state_data(_handle, destination);
+    }
+
+    /// <summary>
+    /// Check if the model is loaded.
+    /// </summary>
+    /// <returns>true if the model was loaded successfully, false otherwise.</returns>
+    public bool IsLoaded()
+    {
+        return NativeMethods.llmodel_isModelLoaded(_handle);
+    }
+
+    /// <summary>
+    /// Load the model from a file.
+    /// </summary>
+    /// <param name="modelPath">The path to the model file.</param>
+    /// <returns>true if the model was loaded successfully, false otherwise.</returns>
+    public bool Load(string modelPath)
+    {
+        return NativeMethods.llmodel_loadModel(_handle, modelPath);
+    }
+
+    protected void Destroy()
+    {
+        NativeMethods.llmodel_model_destroy(_handle);
+    }
+    protected virtual void Dispose(bool disposing)
+    {
+        if (_disposed) return;
+
+        if (disposing)
+        {
+            // dispose managed state
+        }
+
+        switch (_modelType)
+        {
+            default:
+                Destroy();
+                break;
+        }
+
+        _disposed = true;
+    }
+
+    public void Dispose()
+    {
+        Dispose(disposing: true);
+        GC.SuppressFinalize(this);
+    }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Bindings/LLPromptContext.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Bindings/LLPromptContext.cs
@ -1,138 +1,138 @@
-namespace Gpt4All.Bindings;
-
-/// <summary>
-/// Wrapper around the llmodel_prompt_context structure for holding the prompt context.
-/// </summary>
-/// <remarks>
-/// The implementation takes care of all the memory handling of the raw logits pointer and the
-/// raw tokens pointer.Attempting to resize them or modify them in any way can lead to undefined behavior
-/// </remarks>
-public unsafe class LLModelPromptContext
-{
-    private llmodel_prompt_context _ctx;
-
-    internal ref llmodel_prompt_context UnderlyingContext => ref _ctx;
-
-    public LLModelPromptContext()
-    {
-        _ctx = new();
-    }
-
-    /// <summary>
-    /// logits of current context
-    /// </summary>
-    public Span<float> Logits => new(_ctx.logits, (int)_ctx.logits_size);
-
-    /// <summary>
-    /// the size of the raw logits vector
-    /// </summary>
-    public nuint LogitsSize
-    {
-        get => _ctx.logits_size;
-        set => _ctx.logits_size = value;
-    }
-
-    /// <summary>
-    /// current tokens in the context window
-    /// </summary>
-    public Span<int> Tokens => new(_ctx.tokens, (int)_ctx.tokens_size);
-
-    /// <summary>
-    /// the size of the raw tokens vector
-    /// </summary>
-    public nuint TokensSize
-    {
-        get => _ctx.tokens_size;
-        set => _ctx.tokens_size = value;
-    }
-
-    /// <summary>
-    /// top k logits to sample from
-    /// </summary>
-    public int TopK
-    {
-        get => _ctx.top_k;
-        set => _ctx.top_k = value;
-    }
-
-    /// <summary>
-    /// nucleus sampling probability threshold
-    /// </summary>
-    public float TopP
-    {
-        get => _ctx.top_p;
-        set => _ctx.top_p = value;
-    }
-
-    /// <summary>
-    /// temperature to adjust model's output distribution
-    /// </summary>
-    public float Temperature
-    {
-        get => _ctx.temp;
-        set => _ctx.temp = value;
-    }
-
-    /// <summary>
-    /// number of tokens in past conversation
-    /// </summary>
-    public int PastNum
-    {
-        get => _ctx.n_past;
-        set => _ctx.n_past = value;
-    }
-
-    /// <summary>
-    /// number of predictions to generate in parallel
-    /// </summary>
-    public int Batches
-    {
-        get => _ctx.n_batch;
-        set => _ctx.n_batch = value;
-    }
-
-    /// <summary>
-    /// number of tokens to predict
-    /// </summary>
-    public int TokensToPredict
-    {
-        get => _ctx.n_predict;
-        set => _ctx.n_predict = value;
-    }
-
-    /// <summary>
-    /// penalty factor for repeated tokens
-    /// </summary>
-    public float RepeatPenalty
-    {
-        get => _ctx.repeat_penalty;
-        set => _ctx.repeat_penalty = value;
-    }
-
-    /// <summary>
-    /// last n tokens to penalize
-    /// </summary>
-    public int RepeatLastN
-    {
-        get => _ctx.repeat_last_n;
-        set => _ctx.repeat_last_n = value;
-    }
-
-    /// <summary>
-    /// number of tokens possible in context window
-    /// </summary>
-    public int ContextSize
-    {
-        get => _ctx.n_ctx;
-        set => _ctx.n_ctx = value;
-    }
-
-    /// <summary>
-    /// percent of context to erase if we exceed the context window
-    /// </summary>
-    public float ContextErase
-    {
-        get => _ctx.context_erase;
-        set => _ctx.context_erase = value;
-    }
-}
+namespace Gpt4All.Bindings;
+
+/// <summary>
+/// Wrapper around the llmodel_prompt_context structure for holding the prompt context.
+/// </summary>
+/// <remarks>
+/// The implementation takes care of all the memory handling of the raw logits pointer and the
+/// raw tokens pointer.Attempting to resize them or modify them in any way can lead to undefined behavior
+/// </remarks>
+public unsafe class LLModelPromptContext
+{
+    private llmodel_prompt_context _ctx;
+
+    internal ref llmodel_prompt_context UnderlyingContext => ref _ctx;
+
+    public LLModelPromptContext()
+    {
+        _ctx = new();
+    }
+
+    /// <summary>
+    /// logits of current context
+    /// </summary>
+    public Span<float> Logits => new(_ctx.logits, (int)_ctx.logits_size);
+
+    /// <summary>
+    /// the size of the raw logits vector
+    /// </summary>
+    public nuint LogitsSize
+    {
+        get => _ctx.logits_size;
+        set => _ctx.logits_size = value;
+    }
+
+    /// <summary>
+    /// current tokens in the context window
+    /// </summary>
+    public Span<int> Tokens => new(_ctx.tokens, (int)_ctx.tokens_size);
+
+    /// <summary>
+    /// the size of the raw tokens vector
+    /// </summary>
+    public nuint TokensSize
+    {
+        get => _ctx.tokens_size;
+        set => _ctx.tokens_size = value;
+    }
+
+    /// <summary>
+    /// top k logits to sample from
+    /// </summary>
+    public int TopK
+    {
+        get => _ctx.top_k;
+        set => _ctx.top_k = value;
+    }
+
+    /// <summary>
+    /// nucleus sampling probability threshold
+    /// </summary>
+    public float TopP
+    {
+        get => _ctx.top_p;
+        set => _ctx.top_p = value;
+    }
+
+    /// <summary>
+    /// temperature to adjust model's output distribution
+    /// </summary>
+    public float Temperature
+    {
+        get => _ctx.temp;
+        set => _ctx.temp = value;
+    }
+
+    /// <summary>
+    /// number of tokens in past conversation
+    /// </summary>
+    public int PastNum
+    {
+        get => _ctx.n_past;
+        set => _ctx.n_past = value;
+    }
+
+    /// <summary>
+    /// number of predictions to generate in parallel
+    /// </summary>
+    public int Batches
+    {
+        get => _ctx.n_batch;
+        set => _ctx.n_batch = value;
+    }
+
+    /// <summary>
+    /// number of tokens to predict
+    /// </summary>
+    public int TokensToPredict
+    {
+        get => _ctx.n_predict;
+        set => _ctx.n_predict = value;
+    }
+
+    /// <summary>
+    /// penalty factor for repeated tokens
+    /// </summary>
+    public float RepeatPenalty
+    {
+        get => _ctx.repeat_penalty;
+        set => _ctx.repeat_penalty = value;
+    }
+
+    /// <summary>
+    /// last n tokens to penalize
+    /// </summary>
+    public int RepeatLastN
+    {
+        get => _ctx.repeat_last_n;
+        set => _ctx.repeat_last_n = value;
+    }
+
+    /// <summary>
+    /// number of tokens possible in context window
+    /// </summary>
+    public int ContextSize
+    {
+        get => _ctx.n_ctx;
+        set => _ctx.n_ctx = value;
+    }
+
+    /// <summary>
+    /// percent of context to erase if we exceed the context window
+    /// </summary>
+    public float ContextErase
+    {
+        get => _ctx.context_erase;
+        set => _ctx.context_erase = value;
+    }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Bindings/NativeMethods.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Bindings/NativeMethods.cs
@ -1,126 +1,107 @@
-using System.Runtime.InteropServices;
-
-namespace Gpt4All.Bindings;
-
-public unsafe partial struct llmodel_prompt_context
-{
-    public float* logits;
-
-    [NativeTypeName("size_t")]
-    public nuint logits_size;
-
-    [NativeTypeName("int32_t *")]
-    public int* tokens;
-
-    [NativeTypeName("size_t")]
-    public nuint tokens_size;
-
-    [NativeTypeName("int32_t")]
-    public int n_past;
-
-    [NativeTypeName("int32_t")]
-    public int n_ctx;
-
-    [NativeTypeName("int32_t")]
-    public int n_predict;
-
-    [NativeTypeName("int32_t")]
-    public int top_k;
-
-    public float top_p;
-
-    public float temp;
-
-    [NativeTypeName("int32_t")]
-    public int n_batch;
-
-    public float repeat_penalty;
-
-    [NativeTypeName("int32_t")]
-    public int repeat_last_n;
-
-    public float context_erase;
-}
-
-internal static unsafe partial class NativeMethods
-{
-    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
-    [return: MarshalAs(UnmanagedType.I1)]
-    public delegate bool LlmodelResponseCallback(int token_id, [MarshalAs(UnmanagedType.LPUTF8Str)] string response);
-
-    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
-    [return: MarshalAs(UnmanagedType.I1)]
-    public delegate bool LlmodelPromptCallback(int token_id);
-
-    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
-    [return: MarshalAs(UnmanagedType.I1)]
-    public delegate bool LlmodelRecalculateCallback(bool isRecalculating);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("llmodel_model")]
-    public static extern IntPtr llmodel_gptj_create();
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    public static extern void llmodel_gptj_destroy([NativeTypeName("llmodel_model")] IntPtr gptj);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("llmodel_model")]
-    public static extern IntPtr llmodel_mpt_create();
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    public static extern void llmodel_mpt_destroy([NativeTypeName("llmodel_model")] IntPtr mpt);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("llmodel_model")]
-    public static extern IntPtr llmodel_llama_create();
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    public static extern void llmodel_llama_destroy([NativeTypeName("llmodel_model")] IntPtr llama);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
-    [return: NativeTypeName("llmodel_model")]
-    public static extern IntPtr llmodel_model_create(
-        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    public static extern void llmodel_model_destroy([NativeTypeName("llmodel_model")] IntPtr model);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
-    [return: MarshalAs(UnmanagedType.I1)]
-    public static extern bool llmodel_loadModel(
-        [NativeTypeName("llmodel_model")] IntPtr model,
-        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-
-    [return: MarshalAs(UnmanagedType.I1)]
-    public static extern bool llmodel_isModelLoaded([NativeTypeName("llmodel_model")] IntPtr model);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("uint64_t")]
-    public static extern ulong llmodel_get_state_size([NativeTypeName("llmodel_model")] IntPtr model);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("uint64_t")]
-    public static extern ulong llmodel_save_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("uint8_t *")] byte* dest);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("uint64_t")]
-    public static extern ulong llmodel_restore_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("const uint8_t *")] byte* src);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
-    public static extern void llmodel_prompt(
-        [NativeTypeName("llmodel_model")] IntPtr model,
-        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string prompt,
-        LlmodelPromptCallback prompt_callback,
-        LlmodelResponseCallback response_callback,
-        LlmodelRecalculateCallback recalculate_callback,
-        ref llmodel_prompt_context ctx);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    public static extern void llmodel_setThreadCount([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("int32_t")] int n_threads);
-
-    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
-    [return: NativeTypeName("int32_t")]
-    public static extern int llmodel_threadCount([NativeTypeName("llmodel_model")] IntPtr model);
-}
+using System.Runtime.InteropServices;
+
+namespace Gpt4All.Bindings;
+
+public unsafe partial struct llmodel_prompt_context
+{
+    public float* logits;
+
+    [NativeTypeName("size_t")]
+    public nuint logits_size;
+
+    [NativeTypeName("int32_t *")]
+    public int* tokens;
+
+    [NativeTypeName("size_t")]
+    public nuint tokens_size;
+
+    [NativeTypeName("int32_t")]
+    public int n_past;
+
+    [NativeTypeName("int32_t")]
+    public int n_ctx;
+
+    [NativeTypeName("int32_t")]
+    public int n_predict;
+
+    [NativeTypeName("int32_t")]
+    public int top_k;
+
+    public float top_p;
+
+    public float temp;
+
+    [NativeTypeName("int32_t")]
+    public int n_batch;
+
+    public float repeat_penalty;
+
+    [NativeTypeName("int32_t")]
+    public int repeat_last_n;
+
+    public float context_erase;
+}
+
+internal static unsafe partial class NativeMethods
+{
+    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
+    [return: MarshalAs(UnmanagedType.I1)]
+    public delegate bool LlmodelResponseCallback(int token_id, [MarshalAs(UnmanagedType.LPUTF8Str)] string response);
+
+    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
+    [return: MarshalAs(UnmanagedType.I1)]
+    public delegate bool LlmodelPromptCallback(int token_id);
+
+    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
+    [return: MarshalAs(UnmanagedType.I1)]
+    public delegate bool LlmodelRecalculateCallback(bool isRecalculating);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
+    [return: NativeTypeName("llmodel_model")]
+    public static extern IntPtr llmodel_model_create2(
+        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path,
+        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string build_variant,
+        out IntPtr error);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    public static extern void llmodel_model_destroy([NativeTypeName("llmodel_model")] IntPtr model);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
+    [return: MarshalAs(UnmanagedType.I1)]
+    public static extern bool llmodel_loadModel(
+        [NativeTypeName("llmodel_model")] IntPtr model,
+        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+
+    [return: MarshalAs(UnmanagedType.I1)]
+    public static extern bool llmodel_isModelLoaded([NativeTypeName("llmodel_model")] IntPtr model);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    [return: NativeTypeName("uint64_t")]
+    public static extern ulong llmodel_get_state_size([NativeTypeName("llmodel_model")] IntPtr model);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    [return: NativeTypeName("uint64_t")]
+    public static extern ulong llmodel_save_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("uint8_t *")] byte* dest);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    [return: NativeTypeName("uint64_t")]
+    public static extern ulong llmodel_restore_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("const uint8_t *")] byte* src);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
+    public static extern void llmodel_prompt(
+        [NativeTypeName("llmodel_model")] IntPtr model,
+        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string prompt,
+        LlmodelPromptCallback prompt_callback,
+        LlmodelResponseCallback response_callback,
+        LlmodelRecalculateCallback recalculate_callback,
+        ref llmodel_prompt_context ctx);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    public static extern void llmodel_setThreadCount([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("int32_t")] int n_threads);
+
+    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
+    [return: NativeTypeName("int32_t")]
+    public static extern int llmodel_threadCount([NativeTypeName("llmodel_model")] IntPtr model);
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj
@ -1,27 +1,11 @@
 <Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-	<TargetFrameworks>net6.0</TargetFrameworks>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-  </PropertyGroup>
-
-	<ItemGroup>
-		<!-- Windows -->
-		<None Include="..\runtimes\win-x64\native\*.dll" Pack="true" PackagePath="runtimes\win-x64\native\%(Filename)%(Extension)" />
-		<!-- Linux -->
-		<None Include="..\runtimes\linux-x64\native\*.so" Pack="true" PackagePath="runtimes\linux-x64\native\%(Filename)%(Extension)" />
-	</ItemGroup>
-
-	<ItemGroup>
-		<!-- Windows -->
-		<None Condition="$([MSBuild]::IsOSPlatform('Windows'))" Include="..\runtimes\win-x64\native\*.dll" Visible="False" CopyToOutputDirectory="PreserveNewest" />
-		<!-- Linux -->
-		<None Condition="$([MSBuild]::IsOSPlatform('Linux'))" Include="..\runtimes\linux-x64\native\*.so" Visible="False" CopyToOutputDirectory="PreserveNewest" />
-	</ItemGroup>
-
-	<ItemGroup>
-	  <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.0" />
-	</ItemGroup>
+    <PropertyGroup>
+        <TargetFramework>net6.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+        <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    </PropertyGroup>
+    <ItemGroup>
+        <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.0" />
+    </ItemGroup>
 </Project>
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/ILibraryLoader.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/ILibraryLoader.cs
@ -0,0 +1,6 @@
+namespace Gpt4All.LibraryLoader;
+
+public interface ILibraryLoader
+{
+    LoadResult OpenLibrary(string? fileName);
+}
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/LinuxLibraryLoader.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/LinuxLibraryLoader.cs
@ -0,0 +1,53 @@
+using System.Runtime.InteropServices;
+
+namespace Gpt4All.LibraryLoader;
+
+internal class LinuxLibraryLoader : ILibraryLoader
+{
+#pragma warning disable CA2101
+    [DllImport("libdl.so", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlopen")]
+#pragma warning restore CA2101
+    public static extern IntPtr NativeOpenLibraryLibdl(string? filename, int flags);
+
+#pragma warning disable CA2101
+    [DllImport("libdl.so.2", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlopen")]
+#pragma warning restore CA2101
+    public static extern IntPtr NativeOpenLibraryLibdl2(string? filename, int flags);
+
+    [DllImport("libdl.so", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlerror")]
+    public static extern IntPtr GetLoadError();
+
+    [DllImport("libdl.so.2", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlerror")]
+    public static extern IntPtr GetLoadError2();
+
+    public LoadResult OpenLibrary(string? fileName)
+    {
+        IntPtr loadedLib;
+        try
+        {
+            // open with rtls lazy flag
+            loadedLib = NativeOpenLibraryLibdl2(fileName, 0x00001);
+        }
+        catch (DllNotFoundException)
+        {
+            loadedLib = NativeOpenLibraryLibdl(fileName, 0x00001);
+        }
+
+        if (loadedLib == IntPtr.Zero)
+        {
+            string errorMessage;
+            try
+            {
+                errorMessage = Marshal.PtrToStringAnsi(GetLoadError2()) ?? "Unknown error";
+            }
+            catch (DllNotFoundException)
+            {
+                errorMessage = Marshal.PtrToStringAnsi(GetLoadError()) ?? "Unknown error";
+            }
+
+            return LoadResult.Failure(errorMessage);
+        }
+
+        return LoadResult.Success;
+    }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/LoadResult.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/LoadResult.cs
@ -0,0 +1,20 @@
+namespace Gpt4All.LibraryLoader;
+
+public class LoadResult
+{
+    private LoadResult(bool isSuccess, string? errorMessage)
+    {
+        IsSuccess = isSuccess;
+        ErrorMessage = errorMessage;
+    }
+
+    public static LoadResult Success { get; } = new(true, null);
+
+    public static LoadResult Failure(string errorMessage)
+    {
+        return new(false, errorMessage);
+    }
+
+    public bool IsSuccess { get; }
+    public string? ErrorMessage { get; }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/MacOsLibraryLoader.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/MacOsLibraryLoader.cs
@ -0,0 +1,28 @@
+using System.Runtime.InteropServices;
+
+namespace Gpt4All.LibraryLoader;
+
+internal class MacOsLibraryLoader : ILibraryLoader
+{
+#pragma warning disable CA2101
+    [DllImport("libdl.dylib", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlopen")]
+#pragma warning restore CA2101
+    public static extern IntPtr NativeOpenLibraryLibdl(string? filename, int flags);
+
+    [DllImport("libdl.dylib", ExactSpelling = true, CharSet = CharSet.Auto, EntryPoint = "dlerror")]
+    public static extern IntPtr GetLoadError();
+
+    public LoadResult OpenLibrary(string? fileName)
+    {
+        var loadedLib = NativeOpenLibraryLibdl(fileName, 0x00001);
+
+        if (loadedLib == IntPtr.Zero)
+        {
+            var errorMessage = Marshal.PtrToStringAnsi(GetLoadError()) ?? "Unknown error";
+
+            return LoadResult.Failure(errorMessage);
+        }
+
+        return LoadResult.Success;
+    }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/NativeLibraryLoader.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/NativeLibraryLoader.cs
@ -0,0 +1,81 @@
+#if !IOS && !MACCATALYST && !TVOS && !ANDROID
+using System.Runtime.InteropServices;
+#endif
+
+namespace Gpt4All.LibraryLoader;
+
+public static class NativeLibraryLoader
+{
+    private static ILibraryLoader? defaultLibraryLoader;
+
+    /// <summary>
+    /// Sets the library loader used to load the native libraries. Overwrite this only if you want some custom loading.
+    /// </summary>
+    /// <param name="libraryLoader">The library loader to be used.</param>
+    public static void SetLibraryLoader(ILibraryLoader libraryLoader)
+    {
+        defaultLibraryLoader = libraryLoader;
+    }
+
+    internal static LoadResult LoadNativeLibrary(string? path = default, bool bypassLoading = true)
+    {
+        // If the user has handled loading the library themselves, we don't need to do anything.
+        if (bypassLoading)
+        {
+            return LoadResult.Success;
+        }
+
+        var architecture = RuntimeInformation.OSArchitecture switch
+        {
+            Architecture.X64 => "x64",
+            Architecture.X86 => "x86",
+            Architecture.Arm => "arm",
+            Architecture.Arm64 => "arm64",
+            _ => throw new PlatformNotSupportedException(
+                $"Unsupported OS platform, architecture: {RuntimeInformation.OSArchitecture}")
+        };
+
+        var (platform, extension) = Environment.OSVersion.Platform switch
+        {
+            _ when RuntimeInformation.IsOSPlatform(OSPlatform.Windows) => ("win", "dll"),
+            _ when RuntimeInformation.IsOSPlatform(OSPlatform.Linux) => ("linux", "so"),
+            _ when RuntimeInformation.IsOSPlatform(OSPlatform.OSX) => ("osx", "dylib"),
+            _ => throw new PlatformNotSupportedException(
+                $"Unsupported OS platform, architecture: {RuntimeInformation.OSArchitecture}")
+        };
+
+        // If the user hasn't set the path, we'll try to find it ourselves.
+        if (string.IsNullOrEmpty(path))
+        {
+            var libraryName = "libllmodel";
+            var assemblySearchPath = new[]
+            {
+                AppDomain.CurrentDomain.RelativeSearchPath,
+                Path.GetDirectoryName(typeof(NativeLibraryLoader).Assembly.Location),
+                Path.GetDirectoryName(Environment.GetCommandLineArgs()[0])
+            }.FirstOrDefault(it => !string.IsNullOrEmpty(it));
+            // Search for the library dll within the assembly search path. If it doesn't exist, for whatever reason, use the default path.
+            path = Directory.EnumerateFiles(assemblySearchPath ?? string.Empty, $"{libraryName}.{extension}", SearchOption.AllDirectories).FirstOrDefault() ?? Path.Combine("runtimes", $"{platform}-{architecture}", $"{libraryName}.{extension}");
+        }
+
+        if (defaultLibraryLoader != null)
+        {
+            return defaultLibraryLoader.OpenLibrary(path);
+        }
+
+        if (!File.Exists(path))
+        {
+            throw new FileNotFoundException($"Native Library not found in path {path}. " +
+                                            $"Verify you have have included the native Gpt4All library in your application.");
+        }
+
+        ILibraryLoader libraryLoader = platform switch
+        {
+            "win" => new WindowsLibraryLoader(),
+            "osx" => new MacOsLibraryLoader(),
+            "linux" => new LinuxLibraryLoader(),
+            _ => throw new PlatformNotSupportedException($"Currently {platform} platform is not supported")
+        };
+        return libraryLoader.OpenLibrary(path);
+    }
+}
--- a/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/WindowsLibraryLoader.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/LibraryLoader/WindowsLibraryLoader.cs
@ -0,0 +1,24 @@
+using System.ComponentModel;
+using System.Runtime.InteropServices;
+
+namespace Gpt4All.LibraryLoader;
+
+internal class WindowsLibraryLoader : ILibraryLoader
+{
+    public LoadResult OpenLibrary(string? fileName)
+    {
+        var loadedLib = LoadLibrary(fileName);
+
+        if (loadedLib == IntPtr.Zero)
+        {
+            var errorCode = Marshal.GetLastWin32Error();
+            var errorMessage = new Win32Exception(errorCode).Message;
+            return LoadResult.Failure(errorMessage);
+        }
+
+        return LoadResult.Success;
+    }
+
+    [DllImport("kernel32", SetLastError = true, CharSet = CharSet.Auto)]
+    private static extern IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPWStr)] string? lpFileName);
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
@ -1,61 +1,58 @@
-using System.Diagnostics;
-using Microsoft.Extensions.Logging;
-using Gpt4All.Bindings;
-using Microsoft.Extensions.Logging.Abstractions;
-
-namespace Gpt4All;
-
-public class Gpt4AllModelFactory : IGpt4AllModelFactory
-{
-    private readonly ILoggerFactory _loggerFactory;
-    private readonly ILogger _logger;
-
-    public Gpt4AllModelFactory(ILoggerFactory? loggerFactory = null)
-    {
-        _loggerFactory = loggerFactory ?? NullLoggerFactory.Instance;
-        _logger = _loggerFactory.CreateLogger<Gpt4AllModelFactory>();
-    }
-
-    private IGpt4AllModel CreateModel(string modelPath, ModelType? modelType = null)
-    {
-        var modelType_ = modelType ?? ModelFileUtils.GetModelTypeFromModelFileHeader(modelPath);
-
-        _logger.LogInformation("Creating model path={ModelPath} type={ModelType}", modelPath, modelType_);
-
-        var handle = modelType_ switch
-        {
-            ModelType.LLAMA => NativeMethods.llmodel_llama_create(),
-            ModelType.GPTJ => NativeMethods.llmodel_gptj_create(),
-            ModelType.MPT => NativeMethods.llmodel_mpt_create(),
-            _ => NativeMethods.llmodel_model_create(modelPath),
-        };
-
-        _logger.LogDebug("Model created handle=0x{ModelHandle:X8}", handle);
-        _logger.LogInformation("Model loading started");
-
-        var loadedSuccessfully = NativeMethods.llmodel_loadModel(handle, modelPath);
-
-        _logger.LogInformation("Model loading completed success={ModelLoadSuccess}", loadedSuccessfully);
-
-        if (loadedSuccessfully == false)
-        {
-            throw new Exception($"Failed to load model: '{modelPath}'");
-        }
-
-        var logger = _loggerFactory.CreateLogger<LLModel>();
-
-        var underlyingModel = LLModel.Create(handle, modelType_, logger: logger);
-
-        Debug.Assert(underlyingModel.IsLoaded());
-
-        return new Gpt4All(underlyingModel, logger: logger);
-    }
-
-    public IGpt4AllModel LoadModel(string modelPath) => CreateModel(modelPath, modelType: null);
-
-    public IGpt4AllModel LoadMptModel(string modelPath) => CreateModel(modelPath, ModelType.MPT);
-
-    public IGpt4AllModel LoadGptjModel(string modelPath) => CreateModel(modelPath, ModelType.GPTJ);
-
-    public IGpt4AllModel LoadLlamaModel(string modelPath) => CreateModel(modelPath, ModelType.LLAMA);
-}
+using System.Diagnostics;
+using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Extensions.Logging;
+using Gpt4All.Bindings;
+using Gpt4All.LibraryLoader;
+
+namespace Gpt4All;
+
+public class Gpt4AllModelFactory : IGpt4AllModelFactory
+{
+    private readonly ILoggerFactory _loggerFactory;
+    private readonly ILogger _logger;
+    private static bool bypassLoading;
+    private static string? libraryPath;
+
+    private static readonly Lazy<LoadResult> libraryLoaded = new(() =>
+    {
+        return NativeLibraryLoader.LoadNativeLibrary(Gpt4AllModelFactory.libraryPath, Gpt4AllModelFactory.bypassLoading);
+    }, true);
+
+    public Gpt4AllModelFactory(string? libraryPath = default, bool bypassLoading = true, ILoggerFactory? loggerFactory = null)
+    {
+        _loggerFactory = loggerFactory ?? NullLoggerFactory.Instance;
+        _logger = _loggerFactory.CreateLogger<Gpt4AllModelFactory>();
+        Gpt4AllModelFactory.libraryPath = libraryPath;
+        Gpt4AllModelFactory.bypassLoading = bypassLoading;
+
+        if (!libraryLoaded.Value.IsSuccess)
+        {
+            throw new Exception($"Failed to load native gpt4all library. Error: {libraryLoaded.Value.ErrorMessage}");
+        }
+    }
+
+    private IGpt4AllModel CreateModel(string modelPath)
+    {
+        var modelType_ = ModelFileUtils.GetModelTypeFromModelFileHeader(modelPath);
+        _logger.LogInformation("Creating model path={ModelPath} type={ModelType}", modelPath, modelType_);
+        IntPtr error;
+        var handle = NativeMethods.llmodel_model_create2(modelPath, "auto", out error);
+        _logger.LogDebug("Model created handle=0x{ModelHandle:X8}", handle);
+        _logger.LogInformation("Model loading started");
+        var loadedSuccessfully = NativeMethods.llmodel_loadModel(handle, modelPath);
+        _logger.LogInformation("Model loading completed success={ModelLoadSuccess}", loadedSuccessfully);
+        if (!loadedSuccessfully)
+        {
+            throw new Exception($"Failed to load model: '{modelPath}'");
+        }
+
+        var logger = _loggerFactory.CreateLogger<LLModel>();
+        var underlyingModel = LLModel.Create(handle, modelType_, logger: logger);
+
+        Debug.Assert(underlyingModel.IsLoaded());
+
+        return new Gpt4All(underlyingModel, logger: logger);
+    }
+
+    public IGpt4AllModel LoadModel(string modelPath) => CreateModel(modelPath);
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Model/IGpt4AllModelFactory.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Model/IGpt4AllModelFactory.cs
@ -1,12 +1,6 @@
-namespace Gpt4All;
-
-public interface IGpt4AllModelFactory
-{
-    IGpt4AllModel LoadGptjModel(string modelPath);
-
-    IGpt4AllModel LoadLlamaModel(string modelPath);
-
-    IGpt4AllModel LoadModel(string modelPath);
-
-    IGpt4AllModel LoadMptModel(string modelPath);
-}
+namespace Gpt4All;
+
+public interface IGpt4AllModelFactory
+{
+    IGpt4AllModel LoadModel(string modelPath);
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Model/ModelType.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Model/ModelType.cs
@ -1,11 +1,11 @@
-namespace Gpt4All;
-
-/// <summary>
-/// The supported model types
-/// </summary>
-public enum ModelType
-{
-    LLAMA = 0,
-    GPTJ,
-    MPT
-}
+namespace Gpt4All;
+
+/// <summary>
+/// The supported model types
+/// </summary>
+public enum ModelType
+{
+    LLAMA = 0,
+    GPTJ,
+    MPT
+}
--- a/gpt4all-bindings/csharp/Gpt4All/Prediction/ITextPrediction.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Prediction/ITextPrediction.cs
@ -1,31 +1,31 @@
-namespace Gpt4All;
-
-/// <summary>
-/// Interface for text prediction services
-/// </summary>
-public interface ITextPrediction
-{
-    /// <summary>
-    /// Get prediction results for the prompt and provided options.
-    /// </summary>
-    /// <param name="text">The text to complete</param>
-    /// <param name="opts">The prediction settings</param>
-    /// <param name="cancellationToken">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
-    /// <returns>The prediction result generated by the model</returns>
-    Task<ITextPredictionResult> GetPredictionAsync(
-        string text,
-        PredictRequestOptions opts,
-        CancellationToken cancellation = default);
-
-    /// <summary>
-    /// Get streaming prediction results for the prompt and provided options.
-    /// </summary>
-    /// <param name="text">The text to complete</param>
-    /// <param name="opts">The prediction settings</param>
-    /// <param name="cancellationToken">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
-    /// <returns>The prediction result generated by the model</returns>
-    Task<ITextPredictionStreamingResult> GetStreamingPredictionAsync(
-        string text,
-        PredictRequestOptions opts,
-        CancellationToken cancellationToken = default);
-}
+namespace Gpt4All;
+
+/// <summary>
+/// Interface for text prediction services
+/// </summary>
+public interface ITextPrediction
+{
+    /// <summary>
+    /// Get prediction results for the prompt and provided options.
+    /// </summary>
+    /// <param name="text">The text to complete</param>
+    /// <param name="opts">The prediction settings</param>
+    /// <param name="cancellation">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The prediction result generated by the model</returns>
+    Task<ITextPredictionResult> GetPredictionAsync(
+        string text,
+        PredictRequestOptions opts,
+        CancellationToken cancellation = default);
+
+    /// <summary>
+    /// Get streaming prediction results for the prompt and provided options.
+    /// </summary>
+    /// <param name="text">The text to complete</param>
+    /// <param name="opts">The prediction settings</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The prediction result generated by the model</returns>
+    Task<ITextPredictionStreamingResult> GetStreamingPredictionAsync(
+        string text,
+        PredictRequestOptions opts,
+        CancellationToken cancellationToken = default);
+}
--- a/gpt4all-bindings/csharp/build_linux.sh
+++ b/gpt4all-bindings/csharp/build_linux.sh
@ -5,4 +5,6 @@ mkdir runtimes/linux-x64/build
 cmake -S ../../gpt4all-backend -B runtimes/linux-x64/build
 cmake --build runtimes/linux-x64/build --parallel --config Release
 cp runtimes/linux-x64/build/libllmodel.so  runtimes/linux-x64/native/libllmodel.so
-cp runtimes/linux-x64/build/llama.cpp/libllama.so runtimes/linux-x64/native/libllama.so
+cp runtimes/linux-x64/build/libgptj*.so  runtimes/linux-x64/native/
+cp runtimes/linux-x64/build/libllama*.so  runtimes/linux-x64/native/
+cp runtimes/linux-x64/build/libmpt*.so  runtimes/linux-x64/native/
--- a/gpt4all-bindings/csharp/build_win-mingw.ps1
+++ b/gpt4all-bindings/csharp/build_win-mingw.ps1
@ -13,4 +13,5 @@ cmake --build $BUILD_DIR --parallel --config Release

 # copy native dlls
 cp "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin\*dll" $LIBS_DIR
-cp "$BUILD_DIR\*.dll" $LIBS_DIR
+cp "$BUILD_DIR\bin\*.dll" $LIBS_DIR
+mv $LIBS_DIR\llmodel.dll $LIBS_DIR\libllmodel.dll
--- a/gpt4all-bindings/csharp/build_win-msvc.ps1
+++ b/gpt4all-bindings/csharp/build_win-msvc.ps1
@ -2,4 +2,5 @@ Remove-Item -Force -Recurse .\runtimes\win-x64\msvc -ErrorAction SilentlyContinu
 mkdir .\runtimes\win-x64\msvc\build | Out-Null
 cmake -G "Visual Studio 17 2022" -A X64 -S ..\..\gpt4all-backend -B .\runtimes\win-x64\msvc\build
 cmake --build .\runtimes\win-x64\msvc\build --parallel --config Release
-cp .\runtimes\win-x64\msvc\build\bin\Release\*.dll .\runtimes\win-x64
+cp .\runtimes\win-x64\msvc\build\bin\Release\*.dll .\runtimes\win-x64
+mv .\runtimes\win-x64\llmodel.dll .\runtimes\win-x64\libllmodel.dll