[FEAT] OpenRouter integration ()

* WIP openrouter integration

* add OpenRouter options to onboarding flow and data handling

* add todo to fix headers for rankings

* OpenRouter LLM support complete

* Fix hanging response stream with OpenRouter
update tagline
update comment

* update timeout comment

* wait for first chunk to start timer

* sort OpenRouter models by organization

* uppercase first letter of organization

* sort grouped models by org

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2024-02-23 17:18:58 -08:00 committed by GitHub
parent c87ef5b674
commit 633f425206
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1187 additions and 13 deletions
README.md
docker
frontend/src
components/LLMSelection
OpenRouterOptions
TogetherAiOptions
media/llmprovider
pages
GeneralSettings/LLMPreference
OnboardingFlow/Steps
DataHandling
LLMPreference
server

View file

@ -72,6 +72,7 @@ Some cool features of AnythingLLM
- [LocalAi (all models)](https://localai.io/)
- [Together AI (chat models)](https://www.together.ai/)
- [Perplexity (chat models)](https://www.perplexity.ai/)
- [OpenRouter (chat models)](https://openrouter.ai/)
- [Mistral](https://mistral.ai/)
**Supported Embedding models:**

View file

@ -52,6 +52,10 @@ GID='1000'
# PERPLEXITY_API_KEY='my-perplexity-key'
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'
# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'
# LLM_PROVIDER='huggingface'
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx

View file

@ -0,0 +1,97 @@
import System from "@/models/system";
import { useState, useEffect } from "react";
export default function OpenRouterOptions({ settings }) {
return (
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
OpenRouter API Key
</label>
<input
type="password"
name="OpenRouterApiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenRouter API Key"
defaultValue={settings?.OpenRouterApiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<OpenRouterModelSelection settings={settings} />
</div>
);
}
function OpenRouterModelSelection({ settings }) {
const [groupedModels, setGroupedModels] = useState({});
const [loading, setLoading] = useState(true);
useEffect(() => {
async function findCustomModels() {
setLoading(true);
const { models } = await System.customModels("openrouter");
if (models?.length > 0) {
const modelsByOrganization = models.reduce((acc, model) => {
acc[model.organization] = acc[model.organization] || [];
acc[model.organization].push(model);
return acc;
}, {});
setGroupedModels(modelsByOrganization);
}
setLoading(false);
}
findCustomModels();
}, []);
if (loading || Object.keys(groupedModels).length === 0) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="OpenRouterModelPref"
disabled={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
-- loading available models --
</option>
</select>
</div>
);
}
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="OpenRouterModelPref"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{Object.keys(groupedModels)
.sort()
.map((organization) => (
<optgroup key={organization} label={organization}>
{groupedModels[organization].map((model) => (
<option
key={model.id}
value={model.id}
selected={settings.OpenRouterModelPref === model.id}
>
{model.name}
</option>
))}
</optgroup>
))}
</select>
</div>
);
}

View file

@ -76,19 +76,21 @@ function TogetherAiModelSelection({ settings }) {
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{Object.entries(groupedModels).map(([organization, models]) => (
<optgroup key={organization} label={organization}>
{models.map((model) => (
<option
key={model.id}
value={model.id}
selected={settings.TogetherAiModelPref === model.id}
>
{model.name}
</option>
))}
</optgroup>
))}
{Object.keys(groupedModels)
.sort()
.map((organization) => (
<optgroup key={organization} label={organization}>
{groupedModels[organization].map((model) => (
<option
key={model.id}
value={model.id}
selected={settings.OpenRouterModelPref === model.id}
>
{model.name}
</option>
))}
</optgroup>
))}
</select>
</div>
);

Binary file not shown.

After

(image error) Size: 6.2 KiB

View file

@ -15,6 +15,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import PerplexityLogo from "@/media/llmprovider/perplexity.png";
import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@ -31,6 +32,7 @@ import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { MagnifyingGlass } from "@phosphor-icons/react";
import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions";
import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
export default function GeneralLLMPreference() {
const [saving, setSaving] = useState(false);
@ -164,6 +166,13 @@ export default function GeneralLLMPreference() {
description:
"Run powerful and internet-connected models hosted by Perplexity AI.",
},
{
name: "OpenRouter",
value: "openrouter",
logo: OpenRouterLogo,
options: <OpenRouterOptions settings={settings} />,
description: "A unified interface for LLMs.",
},
{
name: "Native",
value: "native",

View file

@ -12,6 +12,7 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import PerplexityLogo from "@/media/llmprovider/perplexity.png";
import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
import ZillizLogo from "@/media/vectordbs/zilliz.png";
import AstraDBLogo from "@/media/vectordbs/astraDB.png";
import ChromaLogo from "@/media/vectordbs/chroma.png";
@ -118,6 +119,14 @@ const LLM_SELECTION_PRIVACY = {
],
logo: PerplexityLogo,
},
openrouter: {
name: "OpenRouter",
description: [
"Your chats will not be used for training",
"Your prompts and document text used in response creation are visible to OpenRouter",
],
logo: OpenRouterLogo,
},
};
const VECTOR_DB_PRIVACY = {

View file

@ -12,6 +12,7 @@ import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import PerplexityLogo from "@/media/llmprovider/perplexity.png";
import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
@ -29,6 +30,7 @@ import System from "@/models/system";
import paths from "@/utils/paths";
import showToast from "@/utils/toast";
import { useNavigate } from "react-router-dom";
import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions";
const TITLE = "LLM Preference";
const DESCRIPTION =
@ -138,6 +140,13 @@ export default function LLMPreference({
description:
"Run powerful and internet-connected models hosted by Perplexity AI.",
},
{
name: "OpenRouter",
value: "openrouter",
logo: OpenRouterLogo,
options: <OpenRouterOptions settings={settings} />,
description: "A unified interface for LLMs.",
},
{
name: "Native",
value: "native",

View file

@ -45,6 +45,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# PERPLEXITY_API_KEY='my-perplexity-key'
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'
# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'
# LLM_PROVIDER='mistral'
# MISTRAL_API_KEY='example-mistral-ai-api-key'
# MISTRAL_MODEL_PREF='mistral-tiny'

View file

@ -188,6 +188,18 @@ const SystemSettings = {
PerplexityApiKey: !!process.env.PERPLEXITY_API_KEY,
PerplexityModelPref: process.env.PERPLEXITY_MODEL_PREF,
// For embedding credentials when ollama is selected.
OpenAiKey: !!process.env.OPEN_AI_KEY,
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
}
: {}),
...(llmProvider === "openrouter"
? {
OpenRouterApiKey: !!process.env.OPENROUTER_API_KEY,
OpenRouterModelPref: process.env.OPENROUTER_MODEL_PREF,
// For embedding credentials when ollama is selected.
OpenAiKey: !!process.env.OPEN_AI_KEY,
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,

View file

@ -0,0 +1,334 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { v4: uuidv4 } = require("uuid");
const { writeResponseChunk } = require("../../helpers/chat/responses");
function openRouterModels() {
const { MODELS } = require("./models.js");
return MODELS || {};
}
class OpenRouterLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.OPENROUTER_API_KEY)
throw new Error("No OpenRouter API key was set.");
const config = new Configuration({
basePath: "https://openrouter.ai/api/v1",
apiKey: process.env.OPENROUTER_API_KEY,
baseOptions: {
headers: {
"HTTP-Referer": "https://useanything.com",
"X-Title": "AnythingLLM",
},
},
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.OPENROUTER_MODEL_PREF || "openrouter/auto";
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
this.embedder = !embedder ? new NativeEmbedder() : embedder;
this.defaultTemp = 0.7;
}
#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
"\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
);
}
allModelInformation() {
return openRouterModels();
}
streamingEnabled() {
return "streamChat" in this && "streamGetChatCompletion" in this;
}
promptWindowLimit() {
const availableModels = this.allModelInformation();
return availableModels[this.model]?.maxLength || 4096;
}
async isValidChatCompletionModel(model = "") {
const availableModels = this.allModelInformation();
return availableModels.hasOwnProperty(model);
}
constructPrompt({
systemPrompt = "",
contextTexts = [],
chatHistory = [],
userPrompt = "",
}) {
const prompt = {
role: "system",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
};
return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
}
async isSafe(_input = "") {
// Not implemented so must be stubbed
return { safe: true, reasons: [] };
}
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
if (!(await this.isValidChatCompletionModel(this.model)))
throw new Error(
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
throw new Error("OpenRouter chat: No results!");
if (res.choices.length === 0)
throw new Error("OpenRouter chat: No results length!");
return res.choices[0].message.content;
})
.catch((error) => {
throw new Error(
`OpenRouter::createChatCompletion failed with: ${error.message}`
);
});
return textResponse;
}
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
if (!(await this.isValidChatCompletionModel(this.model)))
throw new Error(
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
return streamRequest;
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
if (!(await this.isValidChatCompletionModel(this.model)))
throw new Error(
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai
.createChatCompletion({
model: this.model,
messages,
temperature,
})
.catch((e) => {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
if (!(await this.isValidChatCompletionModel(this.model)))
throw new Error(
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
return streamRequest;
}
handleStream(response, stream, responseProps) {
const timeoutThresholdMs = 500;
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
let lastChunkTime = null; // null when first token is still not received.
// NOTICE: Not all OpenRouter models will return a stop reason
// which keeps the connection open and so the model never finalizes the stream
// like the traditional OpenAI response schema does. So in the case the response stream
// never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
// no new chunks then we kill the stream and assume it to be complete. OpenRouter is quite fast
// so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
// we find it is too aggressive.
const timeoutCheck = setInterval(() => {
if (lastChunkTime === null) return;
const now = Number(new Date());
const diffMs = now - lastChunkTime;
if (diffMs >= timeoutThresholdMs) {
console.log(
`OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
);
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
clearInterval(timeoutCheck);
resolve(fullText);
}
}, 500);
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data: /, "");
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
if (message == "[DONE]") {
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
clearInterval(timeoutCheck);
resolve(fullText);
} else {
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (token) {
fullText += token;
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
clearInterval(timeoutCheck);
resolve(fullText);
}
}
}
});
});
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);
return await messageArrayCompressor(this, messageArray, rawHistory);
}
}
module.exports = {
OpenRouterLLM,
openRouterModels,
};

View file

@ -0,0 +1,622 @@
const MODELS = {
"nousresearch/nous-capybara-34b": {
id: "nousresearch/nous-capybara-34b",
name: "Nous: Capybara 34B",
organization: "Nousresearch",
maxLength: 32768,
},
"openrouter/auto": {
id: "openrouter/auto",
name: "Auto (best for prompt)",
organization: "Openrouter",
maxLength: 128000,
},
"nousresearch/nous-capybara-7b:free": {
id: "nousresearch/nous-capybara-7b:free",
name: "Nous: Capybara 7B (free)",
organization: "Nousresearch",
maxLength: 4096,
},
"mistralai/mistral-7b-instruct:free": {
id: "mistralai/mistral-7b-instruct:free",
name: "Mistral 7B Instruct (free)",
organization: "Mistralai",
maxLength: 8192,
},
"gryphe/mythomist-7b:free": {
id: "gryphe/mythomist-7b:free",
name: "MythoMist 7B (free)",
organization: "Gryphe",
maxLength: 32768,
},
"undi95/toppy-m-7b:free": {
id: "undi95/toppy-m-7b:free",
name: "Toppy M 7B (free)",
organization: "Undi95",
maxLength: 4096,
},
"openrouter/cinematika-7b:free": {
id: "openrouter/cinematika-7b:free",
name: "Cinematika 7B (alpha) (free)",
organization: "Openrouter",
maxLength: 8000,
},
"google/gemma-7b-it:free": {
id: "google/gemma-7b-it:free",
name: "Google: Gemma 7B (free)",
organization: "Google",
maxLength: 8000,
},
"jondurbin/bagel-34b": {
id: "jondurbin/bagel-34b",
name: "Bagel 34B v0.2",
organization: "Jondurbin",
maxLength: 8000,
},
"jebcarter/psyfighter-13b": {
id: "jebcarter/psyfighter-13b",
name: "Psyfighter 13B",
organization: "Jebcarter",
maxLength: 4096,
},
"koboldai/psyfighter-13b-2": {
id: "koboldai/psyfighter-13b-2",
name: "Psyfighter v2 13B",
organization: "Koboldai",
maxLength: 4096,
},
"neversleep/noromaid-mixtral-8x7b-instruct": {
id: "neversleep/noromaid-mixtral-8x7b-instruct",
name: "Noromaid Mixtral 8x7B Instruct",
organization: "Neversleep",
maxLength: 8000,
},
"nousresearch/nous-hermes-llama2-13b": {
id: "nousresearch/nous-hermes-llama2-13b",
name: "Nous: Hermes 13B",
organization: "Nousresearch",
maxLength: 4096,
},
"meta-llama/codellama-34b-instruct": {
id: "meta-llama/codellama-34b-instruct",
name: "Meta: CodeLlama 34B Instruct",
organization: "Meta-llama",
maxLength: 8192,
},
"phind/phind-codellama-34b": {
id: "phind/phind-codellama-34b",
name: "Phind: CodeLlama 34B v2",
organization: "Phind",
maxLength: 4096,
},
"intel/neural-chat-7b": {
id: "intel/neural-chat-7b",
name: "Neural Chat 7B v3.1",
organization: "Intel",
maxLength: 4096,
},
"mistralai/mixtral-8x7b-instruct": {
id: "mistralai/mixtral-8x7b-instruct",
name: "Mistral: Mixtral 8x7B Instruct",
organization: "Mistralai",
maxLength: 32768,
},
"nousresearch/nous-hermes-2-mixtral-8x7b-dpo": {
id: "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
name: "Nous: Hermes 2 Mixtral 8x7B DPO",
organization: "Nousresearch",
maxLength: 32000,
},
"nousresearch/nous-hermes-2-mixtral-8x7b-sft": {
id: "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
name: "Nous: Hermes 2 Mixtral 8x7B SFT",
organization: "Nousresearch",
maxLength: 32000,
},
"haotian-liu/llava-13b": {
id: "haotian-liu/llava-13b",
name: "Llava 13B",
organization: "Haotian-liu",
maxLength: 2048,
},
"nousresearch/nous-hermes-2-vision-7b": {
id: "nousresearch/nous-hermes-2-vision-7b",
name: "Nous: Hermes 2 Vision 7B (alpha)",
organization: "Nousresearch",
maxLength: 4096,
},
"meta-llama/llama-2-13b-chat": {
id: "meta-llama/llama-2-13b-chat",
name: "Meta: Llama v2 13B Chat",
organization: "Meta-llama",
maxLength: 4096,
},
"migtissera/synthia-70b": {
id: "migtissera/synthia-70b",
name: "Synthia 70B",
organization: "Migtissera",
maxLength: 8192,
},
"pygmalionai/mythalion-13b": {
id: "pygmalionai/mythalion-13b",
name: "Pygmalion: Mythalion 13B",
organization: "Pygmalionai",
maxLength: 8192,
},
"undi95/remm-slerp-l2-13b-6k": {
id: "undi95/remm-slerp-l2-13b-6k",
name: "ReMM SLERP 13B 6k",
organization: "Undi95",
maxLength: 6144,
},
"gryphe/mythomax-l2-13b": {
id: "gryphe/mythomax-l2-13b",
name: "MythoMax 13B",
organization: "Gryphe",
maxLength: 4096,
},
"xwin-lm/xwin-lm-70b": {
id: "xwin-lm/xwin-lm-70b",
name: "Xwin 70B",
organization: "Xwin-lm",
maxLength: 8192,
},
"gryphe/mythomax-l2-13b-8k": {
id: "gryphe/mythomax-l2-13b-8k",
name: "MythoMax 13B 8k",
organization: "Gryphe",
maxLength: 8192,
},
"alpindale/goliath-120b": {
id: "alpindale/goliath-120b",
name: "Goliath 120B",
organization: "Alpindale",
maxLength: 6144,
},
"neversleep/noromaid-20b": {
id: "neversleep/noromaid-20b",
name: "Noromaid 20B",
organization: "Neversleep",
maxLength: 8192,
},
"gryphe/mythomist-7b": {
id: "gryphe/mythomist-7b",
name: "MythoMist 7B",
organization: "Gryphe",
maxLength: 32768,
},
"mancer/weaver": {
id: "mancer/weaver",
name: "Mancer: Weaver (alpha)",
organization: "Mancer",
maxLength: 8000,
},
"nousresearch/nous-hermes-llama2-70b": {
id: "nousresearch/nous-hermes-llama2-70b",
name: "Nous: Hermes 70B",
organization: "Nousresearch",
maxLength: 4096,
},
"nousresearch/nous-capybara-7b": {
id: "nousresearch/nous-capybara-7b",
name: "Nous: Capybara 7B",
organization: "Nousresearch",
maxLength: 4096,
},
"codellama/codellama-70b-instruct": {
id: "codellama/codellama-70b-instruct",
name: "Meta: CodeLlama 70B Instruct",
organization: "Codellama",
maxLength: 2048,
},
"teknium/openhermes-2-mistral-7b": {
id: "teknium/openhermes-2-mistral-7b",
name: "OpenHermes 2 Mistral 7B",
organization: "Teknium",
maxLength: 4096,
},
"teknium/openhermes-2.5-mistral-7b": {
id: "teknium/openhermes-2.5-mistral-7b",
name: "OpenHermes 2.5 Mistral 7B",
organization: "Teknium",
maxLength: 4096,
},
"undi95/remm-slerp-l2-13b": {
id: "undi95/remm-slerp-l2-13b",
name: "ReMM SLERP 13B",
organization: "Undi95",
maxLength: 4096,
},
"undi95/toppy-m-7b": {
id: "undi95/toppy-m-7b",
name: "Toppy M 7B",
organization: "Undi95",
maxLength: 4096,
},
"openrouter/cinematika-7b": {
id: "openrouter/cinematika-7b",
name: "Cinematika 7B (alpha)",
organization: "Openrouter",
maxLength: 8000,
},
"01-ai/yi-34b-chat": {
id: "01-ai/yi-34b-chat",
name: "Yi 34B Chat",
organization: "01-ai",
maxLength: 4096,
},
"01-ai/yi-34b": {
id: "01-ai/yi-34b",
name: "Yi 34B (base)",
organization: "01-ai",
maxLength: 4096,
},
"01-ai/yi-6b": {
id: "01-ai/yi-6b",
name: "Yi 6B (base)",
organization: "01-ai",
maxLength: 4096,
},
"togethercomputer/stripedhyena-nous-7b": {
id: "togethercomputer/stripedhyena-nous-7b",
name: "StripedHyena Nous 7B",
organization: "Togethercomputer",
maxLength: 32768,
},
"togethercomputer/stripedhyena-hessian-7b": {
id: "togethercomputer/stripedhyena-hessian-7b",
name: "StripedHyena Hessian 7B (base)",
organization: "Togethercomputer",
maxLength: 32768,
},
"mistralai/mixtral-8x7b": {
id: "mistralai/mixtral-8x7b",
name: "Mistral: Mixtral 8x7B (base)",
organization: "Mistralai",
maxLength: 32768,
},
"nousresearch/nous-hermes-yi-34b": {
id: "nousresearch/nous-hermes-yi-34b",
name: "Nous: Hermes 2 Yi 34B",
organization: "Nousresearch",
maxLength: 4096,
},
"nousresearch/nous-hermes-2-mistral-7b-dpo": {
id: "nousresearch/nous-hermes-2-mistral-7b-dpo",
name: "Nous: Hermes 2 Mistral 7B DPO",
organization: "Nousresearch",
maxLength: 8192,
},
"open-orca/mistral-7b-openorca": {
id: "open-orca/mistral-7b-openorca",
name: "Mistral OpenOrca 7B",
organization: "Open-orca",
maxLength: 8192,
},
"huggingfaceh4/zephyr-7b-beta": {
id: "huggingfaceh4/zephyr-7b-beta",
name: "Hugging Face: Zephyr 7B",
organization: "Huggingfaceh4",
maxLength: 4096,
},
"openai/gpt-3.5-turbo": {
id: "openai/gpt-3.5-turbo",
name: "OpenAI: GPT-3.5 Turbo",
organization: "Openai",
maxLength: 4095,
},
"openai/gpt-3.5-turbo-0125": {
id: "openai/gpt-3.5-turbo-0125",
name: "OpenAI: GPT-3.5 Turbo 16k",
organization: "Openai",
maxLength: 16385,
},
"openai/gpt-3.5-turbo-1106": {
id: "openai/gpt-3.5-turbo-1106",
name: "OpenAI: GPT-3.5 Turbo 16k (older v1106)",
organization: "Openai",
maxLength: 16385,
},
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
organization: "Openai",
maxLength: 4095,
},
"openai/gpt-3.5-turbo-0301": {
id: "openai/gpt-3.5-turbo-0301",
name: "OpenAI: GPT-3.5 Turbo (older v0301)",
organization: "Openai",
maxLength: 4095,
},
"openai/gpt-3.5-turbo-16k": {
id: "openai/gpt-3.5-turbo-16k",
name: "OpenAI: GPT-3.5 Turbo 16k",
organization: "Openai",
maxLength: 16385,
},
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo (preview)",
organization: "Openai",
maxLength: 128000,
},
"openai/gpt-4-1106-preview": {
id: "openai/gpt-4-1106-preview",
name: "OpenAI: GPT-4 Turbo (older v1106)",
organization: "Openai",
maxLength: 128000,
},
"openai/gpt-4": {
id: "openai/gpt-4",
name: "OpenAI: GPT-4",
organization: "Openai",
maxLength: 8191,
},
"openai/gpt-4-0314": {
id: "openai/gpt-4-0314",
name: "OpenAI: GPT-4 (older v0314)",
organization: "Openai",
maxLength: 8191,
},
"openai/gpt-4-32k": {
id: "openai/gpt-4-32k",
name: "OpenAI: GPT-4 32k",
organization: "Openai",
maxLength: 32767,
},
"openai/gpt-4-32k-0314": {
id: "openai/gpt-4-32k-0314",
name: "OpenAI: GPT-4 32k (older v0314)",
organization: "Openai",
maxLength: 32767,
},
"openai/gpt-4-vision-preview": {
id: "openai/gpt-4-vision-preview",
name: "OpenAI: GPT-4 Vision (preview)",
organization: "Openai",
maxLength: 128000,
},
"openai/gpt-3.5-turbo-instruct": {
id: "openai/gpt-3.5-turbo-instruct",
name: "OpenAI: GPT-3.5 Turbo Instruct",
organization: "Openai",
maxLength: 4095,
},
"google/palm-2-chat-bison": {
id: "google/palm-2-chat-bison",
name: "Google: PaLM 2 Chat",
organization: "Google",
maxLength: 36864,
},
"google/palm-2-codechat-bison": {
id: "google/palm-2-codechat-bison",
name: "Google: PaLM 2 Code Chat",
organization: "Google",
maxLength: 28672,
},
"google/palm-2-chat-bison-32k": {
id: "google/palm-2-chat-bison-32k",
name: "Google: PaLM 2 Chat 32k",
organization: "Google",
maxLength: 131072,
},
"google/palm-2-codechat-bison-32k": {
id: "google/palm-2-codechat-bison-32k",
name: "Google: PaLM 2 Code Chat 32k",
organization: "Google",
maxLength: 131072,
},
"google/gemini-pro": {
id: "google/gemini-pro",
name: "Google: Gemini Pro (preview)",
organization: "Google",
maxLength: 131040,
},
"google/gemini-pro-vision": {
id: "google/gemini-pro-vision",
name: "Google: Gemini Pro Vision (preview)",
organization: "Google",
maxLength: 65536,
},
"perplexity/pplx-70b-online": {
id: "perplexity/pplx-70b-online",
name: "Perplexity: PPLX 70B Online",
organization: "Perplexity",
maxLength: 4096,
},
"perplexity/pplx-7b-online": {
id: "perplexity/pplx-7b-online",
name: "Perplexity: PPLX 7B Online",
organization: "Perplexity",
maxLength: 4096,
},
"perplexity/pplx-7b-chat": {
id: "perplexity/pplx-7b-chat",
name: "Perplexity: PPLX 7B Chat",
organization: "Perplexity",
maxLength: 8192,
},
"perplexity/pplx-70b-chat": {
id: "perplexity/pplx-70b-chat",
name: "Perplexity: PPLX 70B Chat",
organization: "Perplexity",
maxLength: 4096,
},
"meta-llama/llama-2-70b-chat": {
id: "meta-llama/llama-2-70b-chat",
name: "Meta: Llama v2 70B Chat",
organization: "Meta-llama",
maxLength: 4096,
},
"jondurbin/airoboros-l2-70b": {
id: "jondurbin/airoboros-l2-70b",
name: "Airoboros 70B",
organization: "Jondurbin",
maxLength: 4096,
},
"austism/chronos-hermes-13b": {
id: "austism/chronos-hermes-13b",
name: "Chronos Hermes 13B v2",
organization: "Austism",
maxLength: 4096,
},
"mistralai/mistral-7b-instruct": {
id: "mistralai/mistral-7b-instruct",
name: "Mistral 7B Instruct",
organization: "Mistralai",
maxLength: 8192,
},
"openchat/openchat-7b": {
id: "openchat/openchat-7b",
name: "OpenChat 3.5",
organization: "Openchat",
maxLength: 8192,
},
"lizpreciatior/lzlv-70b-fp16-hf": {
id: "lizpreciatior/lzlv-70b-fp16-hf",
name: "lzlv 70B",
organization: "Lizpreciatior",
maxLength: 4096,
},
"cognitivecomputations/dolphin-mixtral-8x7b": {
id: "cognitivecomputations/dolphin-mixtral-8x7b",
name: "Dolphin 2.6 Mixtral 8x7B 🐬",
organization: "Cognitivecomputations",
maxLength: 32000,
},
"rwkv/rwkv-5-world-3b": {
id: "rwkv/rwkv-5-world-3b",
name: "RWKV v5 World 3B",
organization: "Rwkv",
maxLength: 10000,
},
"recursal/rwkv-5-3b-ai-town": {
id: "recursal/rwkv-5-3b-ai-town",
name: "RWKV v5 3B AI Town",
organization: "Recursal",
maxLength: 10000,
},
"recursal/eagle-7b": {
id: "recursal/eagle-7b",
name: "RWKV v5: Eagle 7B",
organization: "Recursal",
maxLength: 10000,
},
"google/gemma-7b-it": {
id: "google/gemma-7b-it",
name: "Google: Gemma 7B",
organization: "Google",
maxLength: 8000,
},
"anthropic/claude-2": {
id: "anthropic/claude-2",
name: "Anthropic: Claude v2",
organization: "Anthropic",
maxLength: 200000,
},
"anthropic/claude-2.1": {
id: "anthropic/claude-2.1",
name: "Anthropic: Claude v2.1",
organization: "Anthropic",
maxLength: 200000,
},
"anthropic/claude-2.0": {
id: "anthropic/claude-2.0",
name: "Anthropic: Claude v2.0",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-instant-1": {
id: "anthropic/claude-instant-1",
name: "Anthropic: Claude Instant v1",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-instant-1.2": {
id: "anthropic/claude-instant-1.2",
name: "Anthropic: Claude Instant v1.2",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-1": {
id: "anthropic/claude-1",
name: "Anthropic: Claude v1",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-1.2": {
id: "anthropic/claude-1.2",
name: "Anthropic: Claude (older v1)",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-instant-1.0": {
id: "anthropic/claude-instant-1.0",
name: "Anthropic: Claude Instant (older v1)",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-instant-1.1": {
id: "anthropic/claude-instant-1.1",
name: "Anthropic: Claude Instant (older v1.1)",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-2:beta": {
id: "anthropic/claude-2:beta",
name: "Anthropic: Claude v2 (experimental)",
organization: "Anthropic",
maxLength: 200000,
},
"anthropic/claude-2.1:beta": {
id: "anthropic/claude-2.1:beta",
name: "Anthropic: Claude v2.1 (experimental)",
organization: "Anthropic",
maxLength: 200000,
},
"anthropic/claude-2.0:beta": {
id: "anthropic/claude-2.0:beta",
name: "Anthropic: Claude v2.0 (experimental)",
organization: "Anthropic",
maxLength: 100000,
},
"anthropic/claude-instant-1:beta": {
id: "anthropic/claude-instant-1:beta",
name: "Anthropic: Claude Instant v1 (experimental)",
organization: "Anthropic",
maxLength: 100000,
},
"huggingfaceh4/zephyr-7b-beta:free": {
id: "huggingfaceh4/zephyr-7b-beta:free",
name: "Hugging Face: Zephyr 7B (free)",
organization: "Huggingfaceh4",
maxLength: 4096,
},
"openchat/openchat-7b:free": {
id: "openchat/openchat-7b:free",
name: "OpenChat 3.5 (free)",
organization: "Openchat",
maxLength: 8192,
},
"mistralai/mistral-tiny": {
id: "mistralai/mistral-tiny",
name: "Mistral: Tiny",
organization: "Mistralai",
maxLength: 32000,
},
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral: Small",
organization: "Mistralai",
maxLength: 32000,
},
"mistralai/mistral-medium": {
id: "mistralai/mistral-medium",
name: "Mistral: Medium",
organization: "Mistralai",
maxLength: 32000,
},
};
module.exports.MODELS = MODELS;

View file

@ -0,0 +1 @@
*.json

View file

@ -0,0 +1,37 @@
// OpenRouter has lots of models we can use so we use this script
// to cache all the models. We can see the list of all the models
// here: https://openrouter.ai/docs#models
// To run, cd into this directory and run `node parse.mjs`
// copy outputs into the export in ../models.js
// Update the date below if you run this again because OpenRouter added new models.
// Last Collected: Feb 23, 2024
import fs from "fs";
async function parseChatModels() {
const models = {};
const response = await fetch("https://openrouter.ai/api/v1/models");
const data = await response.json();
data.data.forEach((model) => {
models[model.id] = {
id: model.id,
name: model.name,
// capitalize first letter
organization:
model.id.split("/")[0].charAt(0).toUpperCase() +
model.id.split("/")[0].slice(1),
maxLength: model.context_length,
};
});
fs.writeFileSync(
"chat_models.json",
JSON.stringify(models, null, 2),
"utf-8"
);
return models;
}
parseChatModels();

View file

@ -1,3 +1,4 @@
const { openRouterModels } = require("../AiProviders/openRouter");
const { perplexityModels } = require("../AiProviders/perplexity");
const { togetherAiModels } = require("../AiProviders/togetherAi");
const SUPPORT_CUSTOM_MODELS = [
@ -8,6 +9,7 @@ const SUPPORT_CUSTOM_MODELS = [
"togetherai",
"mistral",
"perplexity",
"openrouter",
];
async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@ -29,6 +31,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return nativeLLMModels();
case "perplexity":
return await getPerplexityModels();
case "openrouter":
return await getOpenRouterModels();
default:
return { models: [], error: "Invalid provider for custom models" };
}
@ -138,6 +142,21 @@ async function getPerplexityModels() {
return { models, error: null };
}
async function getOpenRouterModels() {
const knownModels = await openRouterModels();
if (!Object.keys(knownModels).length === 0)
return { models: [], error: null };
const models = Object.values(knownModels).map((model) => {
return {
id: model.id,
organization: model.organization,
name: model.name,
};
});
return { models, error: null };
}
async function getMistralModels(apiKey = null) {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({

View file

@ -61,6 +61,9 @@ function getLLMProvider(modelPreference = null) {
case "perplexity":
const { PerplexityLLM } = require("../AiProviders/perplexity");
return new PerplexityLLM(embedder, modelPreference);
case "openrouter":
const { OpenRouterLLM } = require("../AiProviders/openRouter");
return new OpenRouterLLM(embedder, modelPreference);
case "mistral":
const { MistralLLM } = require("../AiProviders/mistral");
return new MistralLLM(embedder, modelPreference);

View file

@ -249,6 +249,16 @@ const KEY_MAPPING = {
checks: [isNotEmpty],
},
// OpenRouter Options
OpenRouterApiKey: {
envKey: "OPENROUTER_API_KEY",
checks: [isNotEmpty],
},
OpenRouterModelPref: {
envKey: "OPENROUTER_MODEL_PREF",
checks: [isNotEmpty],
},
// System Settings
AuthToken: {
envKey: "AUTH_TOKEN",
@ -325,6 +335,7 @@ function supportedLLM(input = "") {
"mistral",
"huggingface",
"perplexity",
"openrouter",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}