Skip to content

Commit

Permalink
Merge pull request #97 from intelligentnode/update-evaluator
Browse files Browse the repository at this point in the history
Update evaluator
  • Loading branch information
intelligentnode authored Jan 21, 2024
2 parents 3f283c6 + 9340bab commit 525ada8
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 65 deletions.
8 changes: 7 additions & 1 deletion IntelliNode/model/input/ChatModelInput.js
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ class GeminiInput extends ChatModelInput {
constructor(systemMessage, options = {}) {
super(options);
this.messages = [];
this.maxOutputTokens = options.maxTokens
this.temperature = options.temperature

if (systemMessage && typeof systemMessage === 'string') {
this.addUserMessage(systemMessage);
Expand Down Expand Up @@ -207,7 +209,11 @@ class GeminiInput extends ChatModelInput {

getChatInput() {
return {
contents: this.messages
contents: this.messages,
generationConfig: {
...(this.temperature && { temperature: this.temperature }),
...(this.maxOutputTokens && { maxOutputTokens: this.maxOutputTokens }),
}
};
}

Expand Down
5 changes: 3 additions & 2 deletions IntelliNode/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "intellinode",
"version": "1.7.8",
"version": "1.7.9",
"description": "Integrate and evaluate various AI models, such as ChatGPT, Llama, Diffusion, Cohere, Gemini and Hugging Face.",
"main": "index.js",
"keywords": [
Expand All @@ -17,7 +17,8 @@
"prompt",
"automation",
"mistralai",
"gemini"
"gemini",
"robotics"
],
"author": "IntelliNode",
"license": "Apache",
Expand Down
40 changes: 27 additions & 13 deletions IntelliNode/test/integration/ModelEvaluation.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,42 @@ const { SupportedChatModels } = require('../../function/Chatbot');
const { SupportedLangModels } = require('../../controller/RemoteLanguageModel');

// prepare the evaluation settings
const llamaChat = { apiKey: process.env.REPLICATE_API_KEY, provider: SupportedChatModels.REPLICATE,
type:'chat', model: '13b-chat', maxTokens: 50};
const openaiChat = { apiKey: process.env.OPENAI_API_KEY, provider: SupportedChatModels.OPENAI,
type: 'chat', model:'gpt-3.5-turbo', maxTokens: 50};
const cohereCompletion = { apiKey: process.env.COHERE_API_KEY, provider: SupportedLangModels.COHERE,
type:'completion', model: 'command', maxTokens: 50};
const llamaChat = {
apiKey: process.env.REPLICATE_API_KEY, provider: SupportedChatModels.REPLICATE,
type: 'chat', model: '13b-chat', maxTokens: 50
};
const openaiChat = {
apiKey: process.env.OPENAI_API_KEY, provider: SupportedChatModels.OPENAI,
type: 'chat', model: 'gpt-3.5-turbo', maxTokens: 50
};
const cohereCompletion = {
apiKey: process.env.COHERE_API_KEY, provider: SupportedLangModels.COHERE,
type: 'completion', model: 'command', maxTokens: 50
};
const geminiChat = {
apiKey: process.env.GEMINI_API_KEY, provider: SupportedChatModels.GEMINI,
type: 'chat', model: 'gemini'
};
const mistralChat = {
apiKey: process.env.MISTRAL_API_KEY, provider: SupportedChatModels.MISTRAL,
type: 'chat', model: 'mistral-medium', maxTokens: 50
};

// create the evaluation object
const llmEvaluation = new LLMEvaluation(process.env.OPENAI_API_KEY, 'openai');

async function testLLMEvaluation() {
const inputString = "Explain the process of photosynthesis in simple terms.";
const targetAnswers = ["Photosynthesis is the process where green plants use sunlight to turn carbon dioxide and water into glucose and oxygen. The glucose provides food for the plant, and the oxygen gets released back into the air.",
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
const providerSets = [llamaChat, openaiChat, cohereCompletion];
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
const providerSets = [llamaChat, openaiChat, cohereCompletion, geminiChat, mistralChat];

const results = await llmEvaluation.compareModels(inputString, targetAnswers, providerSets);

console.log('OpenAI Chat and Cohere Completion ModelEvaluation Results:', results);

assert(Object.keys(results).length === providerSets.length+1, 'Test failed');
assert(Object.keys(results).length === providerSets.length + 1, 'Test failed');
}


Expand All @@ -35,15 +49,15 @@ async function testLLMEvaluationJson() {
const inputString = "Explain the process of photosynthesis in simple terms.";

const targetAnswers = ["Photosynthesis is the process where green plants use sunlight to turn carbon dioxide and water into glucose and oxygen. The glucose provides food for the plant, and the oxygen gets released back into the air.",
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];

const providerSets = [llamaChat, openaiChat, cohereCompletion];

const results = await llmEvaluation.compareModels(inputString, targetAnswers, providerSets, true);

console.log('Json Results:', results);

}

(async () => {
Expand Down
112 changes: 63 additions & 49 deletions IntelliNode/utils/LLMEvaluation.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ const { RemoteEmbedModel, SupportedEmbedModels } = require('../controller/Remote
const LanguageModelInput = require('../model/input/LanguageModelInput');
const { Chatbot, SupportedChatModels } = require("../function/Chatbot");
const { RemoteLanguageModel, SupportedLangModels } = require("../controller/RemoteLanguageModel");
const { ChatGPTInput, LLamaReplicateInput, LLamaSageInput } = require("../model/input/ChatModelInput");
const { ChatGPTInput, LLamaReplicateInput, LLamaSageInput, GeminiInput, CohereInput, MistralInput } = require("../model/input/ChatModelInput");
const MatchHelpers = require('../utils/MatchHelpers');
const EmbedInput = require('../model/input/EmbedInput');
const { ModelEvaluation } = require('./ModelEvaluation');
Expand All @@ -26,36 +26,42 @@ class LLMEvaluation extends ModelEvaluation {
}

async generateText(apiKey, inputString, provider, modelName, type,
maxTokens = 400, custom_url = null) {
maxTokens = 500, custom_url = null) {

if (type == 'chat' && Object.values(SupportedChatModels).includes(provider.toLowerCase())) {

const customProxy = (custom_url != undefined && custom_url != null && custom_url != '') ? {url: custom_url } : null;

const chatbot = new Chatbot(apiKey, provider, customProxy);

// define the chat input
let input;
if (SupportedChatModels.REPLICATE == provider.toLowerCase()) {
input = new LLamaReplicateInput("provide direct answer", { model: modelName, maxTokens: maxTokens});
} else if (SupportedChatModels.SAGEMAKER == provider.toLowerCase()) {
input = new LLamaSageInput("provide direct answer", {maxTokens: maxTokens});
} else {
input = new ChatGPTInput("provide direct answer", { model: modelName, maxTokens: maxTokens});
}
const customProxy = (custom_url != undefined && custom_url != null && custom_url != '') ? { url: custom_url } : null;

const chatbot = new Chatbot(apiKey, provider, customProxy);

// define the chat input
let input;
if (SupportedChatModels.REPLICATE == provider.toLowerCase()) {
input = new LLamaReplicateInput("provide direct answer", { model: modelName, maxTokens: maxTokens });
} else if (SupportedChatModels.SAGEMAKER == provider.toLowerCase()) {
input = new LLamaSageInput("provide direct answer", { maxTokens: maxTokens });
} else if (SupportedChatModels.GEMINI == provider.toLowerCase()) {
input = new GeminiInput("provide direct answer", { maxTokens: maxTokens });
} else if (SupportedChatModels.COHERE == provider.toLowerCase()) {
input = new CohereInput("provide direct answer", { maxTokens: maxTokens });
} else if (SupportedChatModels.MISTRAL == provider.toLowerCase()) {
input = new MistralInput("provide direct answer", { maxTokens: maxTokens });
} else {
input = new ChatGPTInput("provide direct answer", { model: modelName, maxTokens: maxTokens });
}

input.addUserMessage(inputString);
const responses = await chatbot.chat(input);
input.addUserMessage(inputString);
const responses = await chatbot.chat(input);

return responses[0].trim();
return responses[0].trim();
} else if (type == 'completion' && Object.values(SupportedLangModels).includes(provider.toLowerCase())) {

const languageModel = new RemoteLanguageModel(apiKey, provider);
const langInput = new LanguageModelInput({ prompt: inputString, model: modelName, maxTokens: maxTokens });
langInput.setDefaultValues(provider, maxTokens);
const languageModel = new RemoteLanguageModel(apiKey, provider);
const langInput = new LanguageModelInput({ prompt: inputString, model: modelName, maxTokens: maxTokens });
langInput.setDefaultValues(provider, maxTokens);

const responses = await languageModel.generateText(langInput);
return responses[0].trim();
const responses = await languageModel.generateText(langInput);
return responses[0].trim();
} else {
throw new Error('Provider not supported');
}
Expand Down Expand Up @@ -86,45 +92,53 @@ class LLMEvaluation extends ModelEvaluation {
let targetEmbeddings = [];

// Initiate Embedding for targets
for(let target of targetAnswers) {
for (let target of targetAnswers) {
const embedding = await this.generateEmbedding(target);
targetEmbeddings.push(embedding);
}

for(let provider of providerSets) {
for (let provider of providerSets) {
console.log(`- start ${provider.model} evaluation`)

let predictions = [];
let prediction = await this.generateText(provider.apiKey, inputString, provider.provider,
provider.model, provider.type,
provider.maxTokens, provider.url);
const predictionEmbedding = await this.generateEmbedding(prediction);

let cosineSum = 0, euclideanSum = 0, manhattanSum = 0;
for(let targetEmbedding of targetEmbeddings) {
cosineSum += MatchHelpers.cosineSimilarity(predictionEmbedding, targetEmbedding);
euclideanSum += MatchHelpers.euclideanDistance(predictionEmbedding, targetEmbedding);
manhattanSum += MatchHelpers.manhattanDistance(predictionEmbedding, targetEmbedding);
}

const avgCosine = cosineSum / targetEmbeddings.length;
const avgEuclidean = euclideanSum / targetEmbeddings.length;
const avgManhattan = manhattanSum / targetEmbeddings.length;
try {
let prediction = await this.generateText(provider.apiKey, inputString, provider.provider,
provider.model, provider.type,
provider.maxTokens, provider.url);
const predictionEmbedding = await this.generateEmbedding(prediction);

let cosineSum = 0, euclideanSum = 0, manhattanSum = 0;
for (let targetEmbedding of targetEmbeddings) {
cosineSum += MatchHelpers.cosineSimilarity(predictionEmbedding, targetEmbedding);
euclideanSum += MatchHelpers.euclideanDistance(predictionEmbedding, targetEmbedding);
manhattanSum += MatchHelpers.manhattanDistance(predictionEmbedding, targetEmbedding);
}

predictions.push({
prediction: prediction,
score_cosine_similarity: avgCosine,
score_euclidean_distance: avgEuclidean,
score_manhattan_distance: avgManhattan
});
const avgCosine = cosineSum / targetEmbeddings.length;
const avgEuclidean = euclideanSum / targetEmbeddings.length;
const avgManhattan = manhattanSum / targetEmbeddings.length;

predictions.push({
prediction: prediction,
score_cosine_similarity: avgCosine,
score_euclidean_distance: avgEuclidean,
score_manhattan_distance: avgManhattan,
stop_reason: "complete"
});
} catch (error) {
console.error(error);
predictions.push({
stop_reason: "error"
});
}

results[`${provider.provider}/${provider.model}`] = predictions;
}

results['lookup'] = {
'cosine_similarity': 'a value closer to 1 indicates a higher degree of similarity between two vectors.',
'euclidean_distance': 'the lower the value, the closer the two points.',
'manhattan_distance': 'the lower the value, the closer the two vectors.'
'cosine_similarity': 'a value closer to 1 indicates a higher degree of similarity between two vectors.',
'euclidean_distance': 'the lower the value, the closer the two points.',
'manhattan_distance': 'the lower the value, the closer the two vectors.'
}

if (isJson) {
Expand Down

0 comments on commit 525ada8

Please sign in to comment.