Update groq.js

Fixed small error that would endlessly retry groqcloud response if Deepseek-R1 was chosen
This commit is contained in:
Sweaterdog 2025-01-28 13:43:33 -08:00 committed by GitHub
parent c78dba7776
commit ec6f4f7098
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,88 +1,115 @@
// groq.js
import Groq from 'groq-sdk'; import Groq from 'groq-sdk';
import { getKey } from '../utils/keys.js'; import { getKey } from '../utils/keys.js';
import { log } from '../../logger.js';
// Umbrella class for Mixtral, LLama, Gemma... /**
* Umbrella class for Mixtral, LLama, Gemma...
*/
export class GroqCloudAPI { export class GroqCloudAPI {
constructor(model_name, url, max_tokens=16384) { constructor(model_name, url, max_tokens = 16384) {
this.model_name = model_name; this.model_name = model_name;
this.url = url; this.url = url;
this.max_tokens = max_tokens; this.max_tokens = max_tokens;
// ReplicateAPI theft :3 // Groq Cloud doesn't support custom URLs; warn if provided
if (this.url) { if (this.url) {
console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL."); console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL.");
} }
// Initialize Groq SDK with the API key
this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') }); this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
} }
async sendRequest(turns, systemMessage, stop_seq=null) { /**
// We'll do up to 5 attempts for partial <think> mismatch if * Sends a chat completion request to the Groq Cloud endpoint.
// the model name includes "deepseek-r1". *
* @param {Array} turns - An array of message objects, e.g., [{role: 'user', content: 'Hi'}].
* @param {string} systemMessage - The system prompt or instruction.
* @param {string} stop_seq - A string that represents a stopping sequence, default '***'.
* @returns {Promise<string>} - The content of the model's reply.
*/
async sendRequest(turns, systemMessage, stop_seq = '***') {
// Maximum number of attempts to handle partial <think> tag mismatches 5 is a good value, I guess
const maxAttempts = 5; const maxAttempts = 5;
let attempt = 0; let attempt = 0;
let finalRes = null; let finalRes = null;
// Prepare the message array // Prepare the input messages by prepending the system message
let messages = [{ role: "system", content: systemMessage }].concat(turns); const messages = [{ role: 'system', content: systemMessage }, ...turns];
console.log('Messages:', messages);
while (attempt < maxAttempts) { while (attempt < maxAttempts) {
attempt++; attempt++;
console.log(`Awaiting Groq response... (attempt: ${attempt}/${maxAttempts})`); console.log(`Awaiting Groq response... (model: ${this.model_name}, attempt: ${attempt})`);
let res = null;
// Collect the streaming response
let temp_res = "";
try { try {
// Create the chat completion stream // Create the chat completion request
let completion = await this.groq.chat.completions.create({ const completion = await this.groq.chat.completions.create({
messages: messages, messages: messages,
model: this.model_name || "mixtral-8x7b-32768", model: this.model_name || "mixtral-8x7b-32768",
temperature: 0.2, temperature: 0.2,
max_tokens: this.max_tokens, max_tokens: this.max_tokens,
top_p: 1, top_p: 1,
stream: true, stream: false,
stop: stop_seq // e.g. "***" stop: stop_seq // "***"
}); });
// Read each streamed chunk // Extract the content from the response
for await (const chunk of completion) { res = completion?.choices?.[0]?.message?.content || '';
temp_res += chunk.choices[0]?.delta?.content || ''; console.log('Received response from Groq.');
}
} catch (err) { } catch (err) {
console.error("Error while streaming from Groq:", err); // Handle context length exceeded by retrying with shorter context
temp_res = "My brain just kinda stopped working. Try again."; if (
// We won't retry partial mismatch if a genuine error occurred here err.message.toLowerCase().includes('context length') &&
finalRes = temp_res; turns.length > 1
break; ) {
console.log('Context length exceeded, trying again with a shorter context.');
// Remove the earliest user turn and retry
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
} else {
// Log other errors and return fallback message
console.log(err);
res = 'My brain disconnected, try again.';
}
} }
// If the model name includes "deepseek-r1", apply <think> logic // If the model name includes "deepseek-r1", handle <think> tags
if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) { if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) {
const hasOpen = temp_res.includes("<think>"); const hasOpenTag = res.includes("<think>");
const hasClose = temp_res.includes("</think>"); const hasCloseTag = res.includes("</think>");
// If partial mismatch, retry // Check for partial <think> tag mismatches
if ((hasOpen && !hasClose) || (!hasOpen && hasClose)) { if ((hasOpenTag && !hasCloseTag)) {
console.warn("Partial <think> block detected. Retrying..."); console.warn("Partial <think> block detected. Re-generating Groq request...");
continue; // Retry the request by continuing the loop
continue;
} }
// If both <think> and </think> appear, remove the entire block // If </think> is present but <think> is not, prepend <think>
if (hasOpen && hasClose) { if (hasCloseTag && !hasOpenTag) {
// Remove everything from <think> to </think> res = '<think>' + res;
temp_res = temp_res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
} }
// Trim the <think> block from the response
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
} }
// We either do not have deepseek-r1 or we have a correct <think> scenario // Assign the processed response and exit the loop
finalRes = temp_res; finalRes = res;
break; break; // Stop retrying
} }
// If, after max attempts, we never set finalRes (e.g., partial mismatch each time) // If after all attempts, finalRes is still null, assign a fallback
if (finalRes == null) { if (finalRes == null) {
console.warn("Could not obtain a valid or matched <think> response after max attempts."); console.warn("Could not obtain a valid <think> block or normal response after max attempts.");
finalRes = "Response incomplete, please try again."; finalRes = 'Response incomplete, please try again.';
} }
finalRes = finalRes.replace(/<\|separator\|>/g, '*no response*');
return finalRes; return finalRes;
} }