API: Allow content arrays for multimodal OpenAI requests (#5277)

2024-10-01 01:26:03 -04:00 · 2024-01-22 13:10:26 +02:00 · 2024-01-22 13:10:26 +02:00 · fbf8ae39f8
commit fbf8ae39f8
parent 166fdf09f3
1 changed files with 20 additions and 0 deletions
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@ -144,6 +144,26 @@ def convert_history(history):
    user_input = ""
    system_message = ""

+    if any(isinstance(entry['content'], list) for entry in history):
+        new_history = []
+        for entry in history:
+            if isinstance(entry['content'], list):
+                image_url = None
+                content = None
+                for item in entry['content']:
+                    if not isinstance(item, dict):
+                        continue
+                    if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
+                        image_url = item['image_url']['url']
+                    elif item['type'] == 'text' and isinstance(item['text'], str):
+                        content = item['text']
+                if image_url and content:
+                    new_history.append({"image_url": image_url, "role": "user"})
+                    new_history.append({"content": content, "role": "user"})
+            else:
+                new_history.append(entry)
+        history = new_history
+
    for entry in history:
        if "image_url" in entry:
            image_url = entry['image_url']