mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-16 04:05:37 +02:00
Update groq.js
Fixed small error that would endlessly retry groqcloud response if Deepseek-R1 was chosen
This commit is contained in:
parent
c78dba7776
commit
ec6f4f7098
1 changed files with 69 additions and 42 deletions
|
@ -1,88 +1,115 @@
|
||||||
|
// groq.js
|
||||||
|
|
||||||
import Groq from 'groq-sdk';
|
import Groq from 'groq-sdk';
|
||||||
import { getKey } from '../utils/keys.js';
|
import { getKey } from '../utils/keys.js';
|
||||||
|
import { log } from '../../logger.js';
|
||||||
|
|
||||||
// Umbrella class for Mixtral, LLama, Gemma...
|
/**
|
||||||
|
* Umbrella class for Mixtral, LLama, Gemma...
|
||||||
|
*/
|
||||||
export class GroqCloudAPI {
|
export class GroqCloudAPI {
|
||||||
constructor(model_name, url, max_tokens=16384) {
|
constructor(model_name, url, max_tokens = 16384) {
|
||||||
this.model_name = model_name;
|
this.model_name = model_name;
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.max_tokens = max_tokens;
|
this.max_tokens = max_tokens;
|
||||||
|
|
||||||
// ReplicateAPI theft :3
|
// Groq Cloud doesn't support custom URLs; warn if provided
|
||||||
if (this.url) {
|
if (this.url) {
|
||||||
console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL.");
|
console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize Groq SDK with the API key
|
||||||
this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
|
this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendRequest(turns, systemMessage, stop_seq=null) {
|
/**
|
||||||
// We'll do up to 5 attempts for partial <think> mismatch if
|
* Sends a chat completion request to the Groq Cloud endpoint.
|
||||||
// the model name includes "deepseek-r1".
|
*
|
||||||
|
* @param {Array} turns - An array of message objects, e.g., [{role: 'user', content: 'Hi'}].
|
||||||
|
* @param {string} systemMessage - The system prompt or instruction.
|
||||||
|
* @param {string} stop_seq - A string that represents a stopping sequence, default '***'.
|
||||||
|
* @returns {Promise<string>} - The content of the model's reply.
|
||||||
|
*/
|
||||||
|
async sendRequest(turns, systemMessage, stop_seq = '***') {
|
||||||
|
// Maximum number of attempts to handle partial <think> tag mismatches 5 is a good value, I guess
|
||||||
const maxAttempts = 5;
|
const maxAttempts = 5;
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
let finalRes = null;
|
let finalRes = null;
|
||||||
|
|
||||||
// Prepare the message array
|
// Prepare the input messages by prepending the system message
|
||||||
let messages = [{ role: "system", content: systemMessage }].concat(turns);
|
const messages = [{ role: 'system', content: systemMessage }, ...turns];
|
||||||
|
console.log('Messages:', messages);
|
||||||
|
|
||||||
while (attempt < maxAttempts) {
|
while (attempt < maxAttempts) {
|
||||||
attempt++;
|
attempt++;
|
||||||
console.log(`Awaiting Groq response... (attempt: ${attempt}/${maxAttempts})`);
|
console.log(`Awaiting Groq response... (model: ${this.model_name}, attempt: ${attempt})`);
|
||||||
|
|
||||||
|
let res = null;
|
||||||
|
|
||||||
// Collect the streaming response
|
|
||||||
let temp_res = "";
|
|
||||||
try {
|
try {
|
||||||
// Create the chat completion stream
|
// Create the chat completion request
|
||||||
let completion = await this.groq.chat.completions.create({
|
const completion = await this.groq.chat.completions.create({
|
||||||
messages: messages,
|
messages: messages,
|
||||||
model: this.model_name || "mixtral-8x7b-32768",
|
model: this.model_name || "mixtral-8x7b-32768",
|
||||||
temperature: 0.2,
|
temperature: 0.2,
|
||||||
max_tokens: this.max_tokens,
|
max_tokens: this.max_tokens,
|
||||||
top_p: 1,
|
top_p: 1,
|
||||||
stream: true,
|
stream: false,
|
||||||
stop: stop_seq // e.g. "***"
|
stop: stop_seq // "***"
|
||||||
});
|
});
|
||||||
|
|
||||||
// Read each streamed chunk
|
// Extract the content from the response
|
||||||
for await (const chunk of completion) {
|
res = completion?.choices?.[0]?.message?.content || '';
|
||||||
temp_res += chunk.choices[0]?.delta?.content || '';
|
console.log('Received response from Groq.');
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Error while streaming from Groq:", err);
|
// Handle context length exceeded by retrying with shorter context
|
||||||
temp_res = "My brain just kinda stopped working. Try again.";
|
if (
|
||||||
// We won't retry partial mismatch if a genuine error occurred here
|
err.message.toLowerCase().includes('context length') &&
|
||||||
finalRes = temp_res;
|
turns.length > 1
|
||||||
break;
|
) {
|
||||||
|
console.log('Context length exceeded, trying again with a shorter context.');
|
||||||
|
// Remove the earliest user turn and retry
|
||||||
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||||
|
} else {
|
||||||
|
// Log other errors and return fallback message
|
||||||
|
console.log(err);
|
||||||
|
res = 'My brain disconnected, try again.';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the model name includes "deepseek-r1", apply <think> logic
|
// If the model name includes "deepseek-r1", handle <think> tags
|
||||||
if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) {
|
if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) {
|
||||||
const hasOpen = temp_res.includes("<think>");
|
const hasOpenTag = res.includes("<think>");
|
||||||
const hasClose = temp_res.includes("</think>");
|
const hasCloseTag = res.includes("</think>");
|
||||||
|
|
||||||
// If partial mismatch, retry
|
// Check for partial <think> tag mismatches
|
||||||
if ((hasOpen && !hasClose) || (!hasOpen && hasClose)) {
|
if ((hasOpenTag && !hasCloseTag)) {
|
||||||
console.warn("Partial <think> block detected. Retrying...");
|
console.warn("Partial <think> block detected. Re-generating Groq request...");
|
||||||
continue;
|
// Retry the request by continuing the loop
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If both <think> and </think> appear, remove the entire block
|
// If </think> is present but <think> is not, prepend <think>
|
||||||
if (hasOpen && hasClose) {
|
if (hasCloseTag && !hasOpenTag) {
|
||||||
// Remove everything from <think> to </think>
|
res = '<think>' + res;
|
||||||
temp_res = temp_res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
||||||
}
|
}
|
||||||
|
// Trim the <think> block from the response
|
||||||
|
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We either do not have deepseek-r1 or we have a correct <think> scenario
|
// Assign the processed response and exit the loop
|
||||||
finalRes = temp_res;
|
finalRes = res;
|
||||||
break;
|
break; // Stop retrying
|
||||||
}
|
}
|
||||||
|
|
||||||
// If, after max attempts, we never set finalRes (e.g., partial mismatch each time)
|
// If after all attempts, finalRes is still null, assign a fallback
|
||||||
if (finalRes == null) {
|
if (finalRes == null) {
|
||||||
console.warn("Could not obtain a valid or matched <think> response after max attempts.");
|
console.warn("Could not obtain a valid <think> block or normal response after max attempts.");
|
||||||
finalRes = "Response incomplete, please try again.";
|
finalRes = 'Response incomplete, please try again.';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finalRes = finalRes.replace(/<\|separator\|>/g, '*no response*');
|
||||||
|
|
||||||
return finalRes;
|
return finalRes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue