mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-16 04:05:37 +02:00
Update local.js
Add Deepseek-R1 Support. Deepseek-R1 uses "<think> [Thoughts] </think>" and the response can be cut short if it reasons through a command and it's syntax, so we cut it short before returning the response.
This commit is contained in:
parent
d0d1e45c8c
commit
7644c63565
1 changed files with 87 additions and 23 deletions
|
@ -8,41 +8,104 @@ export class Local {
|
||||||
this.embedding_endpoint = '/api/embeddings';
|
this.embedding_endpoint = '/api/embeddings';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main method to handle chat requests.
|
||||||
|
*/
|
||||||
async sendRequest(turns, systemMessage) {
|
async sendRequest(turns, systemMessage) {
|
||||||
let model = this.model_name || 'llama3';
|
// Choose the model name or default to 'llama3'
|
||||||
|
const model = this.model_name || 'llama3';
|
||||||
|
|
||||||
|
// Format messages and inject the system message at the front
|
||||||
let messages = strictFormat(turns);
|
let messages = strictFormat(turns);
|
||||||
messages.unshift({role: 'system', content: systemMessage});
|
messages.unshift({ role: 'system', content: systemMessage });
|
||||||
let res = null;
|
console.log('Messages:', messages);
|
||||||
try {
|
|
||||||
console.log(`Awaiting local response... (model: ${model})`)
|
// We'll do up to 5 attempts for "deepseek-r1" if the <think> tags are mismatched
|
||||||
res = await this.send(this.chat_endpoint, {model: model, messages: messages, stream: false});
|
const maxAttempts = 5;
|
||||||
if (res)
|
let attempt = 0;
|
||||||
res = res['message']['content'];
|
let finalRes = null;
|
||||||
}
|
|
||||||
catch (err) {
|
while (attempt < maxAttempts) {
|
||||||
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
attempt++;
|
||||||
console.log('Context length exceeded, trying again with shorter context.');
|
console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);
|
||||||
return await sendRequest(turns.slice(1), systemMessage, stop_seq);
|
|
||||||
} else {
|
// Perform the actual request (wrapped in a try/catch)
|
||||||
console.log(err);
|
let res;
|
||||||
res = 'My brain disconnected, try again.';
|
try {
|
||||||
|
const responseData = await this.send(this.chat_endpoint, {
|
||||||
|
model: model,
|
||||||
|
messages: messages,
|
||||||
|
stream: false
|
||||||
|
});
|
||||||
|
// The local endpoint apparently returns { message: { content: "..." } }
|
||||||
|
res = responseData?.message?.content || 'No response data.';
|
||||||
|
} catch (err) {
|
||||||
|
// If context length exceeded and we have turns to remove, try again with one fewer turn
|
||||||
|
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
||||||
|
console.log('Context length exceeded, trying again with shorter context.');
|
||||||
|
return await this.sendRequest(turns.slice(1), systemMessage);
|
||||||
|
} else {
|
||||||
|
console.log(err);
|
||||||
|
res = 'My brain disconnected, try again.';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the model name includes "deepseek-r1", then we handle the <think> block
|
||||||
|
if (this.model_name && this.model_name.includes("deepseek-r1")) {
|
||||||
|
const hasOpenTag = res.includes("<think>");
|
||||||
|
const hasCloseTag = res.includes("</think>");
|
||||||
|
|
||||||
|
// If there's a partial mismatch, we regenerate the response
|
||||||
|
if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
|
||||||
|
console.warn("Partial <think> block detected. Re-generating...");
|
||||||
|
// Attempt another loop iteration to get a complete or no-think response
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If both tags appear, remove them (and everything inside)
|
||||||
|
if (hasOpenTag && hasCloseTag) {
|
||||||
|
res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We made it here with either a fully valid or not-needed to handle <think> scenario
|
||||||
|
finalRes = res;
|
||||||
|
break; // Break out of the while loop
|
||||||
}
|
}
|
||||||
return res;
|
|
||||||
|
// If after max attempts we STILL have partial tags, finalRes might be partial
|
||||||
|
// Or we never set finalRes because all attempts threw partial tags
|
||||||
|
if (finalRes == null) {
|
||||||
|
// This means we kept continuing in the loop but never got a break
|
||||||
|
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
||||||
|
finalRes = 'Response incomplete, please try again.';
|
||||||
|
}
|
||||||
|
return finalRes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Embedding method (unchanged).
|
||||||
|
*/
|
||||||
async embed(text) {
|
async embed(text) {
|
||||||
let model = this.model_name || 'nomic-embed-text';
|
let model = this.model_name || 'nomic-embed-text';
|
||||||
let body = {model: model, prompt: text};
|
let body = { model: model, prompt: text };
|
||||||
let res = await this.send(this.embedding_endpoint, body);
|
let res = await this.send(this.embedding_endpoint, body);
|
||||||
return res['embedding']
|
return res['embedding'];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic send method for local endpoint.
|
||||||
|
*/
|
||||||
async send(endpoint, body) {
|
async send(endpoint, body) {
|
||||||
const url = new URL(endpoint, this.url);
|
const url = new URL(endpoint, this.url);
|
||||||
let method = 'POST';
|
const method = 'POST';
|
||||||
let headers = new Headers();
|
const headers = new Headers();
|
||||||
const request = new Request(url, {method, headers, body: JSON.stringify(body)});
|
const request = new Request(url, {
|
||||||
|
method,
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
});
|
||||||
|
|
||||||
let data = null;
|
let data = null;
|
||||||
try {
|
try {
|
||||||
const res = await fetch(request);
|
const res = await fetch(request);
|
||||||
|
@ -54,7 +117,8 @@ export class Local {
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Failed to send Ollama request.');
|
console.error('Failed to send Ollama request.');
|
||||||
console.error(err);
|
console.error(err);
|
||||||
|
throw err; // rethrow so we can catch it in the calling method
|
||||||
}
|
}
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue