// Get the element where you want to display the outputconst outputElement = document.getElementById("output");
forawait (const text of prediction) {
outputElement.textContent += text;
}
默认情况下,当您的客户端断开与 LM Studio 的连接时,该客户端加载的所有模型都会卸载。您可以通过将 noHup 选项设置为 true 来防止这种情况。
await client.llm.load("lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
config: { gpuOffload: "max" },
noHup: true,
});
// The model stays loaded even after the client disconnects
为已加载的模型指定友好名称
加载模型时,您可以为其设置一个标识符。此标识符可用于稍后引用该模型。
await client.llm.load("lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", {
config: { gpuOffload: "max" },
identifier: "my-model",
});
// You can refer to the model later using the identifierconst myModel = await client.llm.get("my-model");
// myModel.complete(...);
使用自定义配置加载模型
默认情况下,模型的加载配置来自与该模型关联的预设(可以在 LM Studio 的“我的模型”页面上更改)。
const llama3 = await client.llm.load("lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", {
config: {
gpuOffload: "max",
contextLength: 1024,
gpuOffload: 0.5, // Offloads 50% of the computation to the GPU
},
});
// llama3.complete(...);
使用特定预设加载模型
预设确定模型的默认加载配置和默认推理配置。默认情况下,使用与模型关联的预设。(可以在 LM Studio 的“我的模型”页面上更改)。您可以通过指定 preset 选项来更改使用的预设。
const llama3 = await client.llm.load("lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", {
config: { gpuOffload: "max" }, // Overrides the presetpreset: "My ChatML",
});
// Matches any quantizationconst llama3 = await client.llm.get({ path: "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF" });
// Or if a specific quantization is desired:const llama3 = await client.llm.get({
path: "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
});
// llama3.complete(...);
const loadedModels = await client.llm.listLoaded();
if (loadedModels.length === 0) {
thrownewError("No models loaded");
}
// Use the first oneconst firstModel = await client.llm.get({ identifier: loadedModels[0].identifier });
// firstModel.complete(...);
文本补全
要执行文本补全,请使用 complete 方法。
const prediction = model.complete("The meaning of life is");
forawait (const text of prediction) {
process.stdout.write(text);
}
默认情况下,使用预设中的推理参数进行预测。您可以像这样覆盖它们:
const prediction = anyModel.complete("Meaning of life is", {
contextOverflowPolicy: "stopAtLimit",
maxPredictedTokens: 100,
prePrompt: "Some pre-prompt",
stopStrings: ["\n"],
temperature: 0.7,
});
// ...Do stuff with the prediction...
对话式补全
要进行对话,请使用 respond 方法。
const prediction = anyModel.respond([
{ role: "system", content: "Answer the following questions." },
{ role: "user", content: "What is the meaning of life?" },
]);
forawait (const text of prediction) {
process.stdout.write(text);
}
同样,您可以覆盖对话的推理参数(注意,可用选项与文本补全不同)。
const prediction = anyModel.respond(
[
{ role: "system", content: "Answer the following questions." },
{ role: "user", content: "What is the meaning of life?" },
],
{
contextOverflowPolicy: "stopAtLimit",
maxPredictedTokens: 100,
stopStrings: ["\n"],
temperature: 0.7,
inputPrefix: "Q: ",
inputSuffix: "\nA:",
},
);
// ...Do stuff with the prediction...
const prediction = model.complete("The meaning of life is");
forawait (const text of prediction) {
process.stdout.write(text);
}
const { stats } = await prediction;
console.log(stats);
信息
当您已经使用完预测流后,等待预测对象不会导致任何额外的等待,因为结果缓存在预测对象中。
另一方面,如果您只关心最终结果,则无需遍历流。相反,您可以直接等待预测对象以获取最终结果。
const prediction = model.complete("The meaning of life is");
const result = await prediction;
const content = result.content;
const stats = result.stats;
// Or just:const { content, stats } = await model.complete("The meaning of life is");
生成 JSON(结构化输出)
LM Studio 支持结构化预测,这将强制模型生成符合特定结构的内容。要启用结构化预测,您应该设置 structured 字段。它适用于 complete 和 respond 方法。
这是一个如何使用结构化预测的示例:
const prediction = model.complete("Here is a joke in JSON:", {
maxPredictedTokens: 100,
structured: { type: "json" },
});
const result = await prediction;
try {
// Although the LLM is guaranteed to only produce valid JSON, when it is interrupted, the// partial result might not be. Always check for errors. (See below)const parsed = JSON.parse(result.content);
console.info(parsed);
} catch (e) {
console.error(e);
}
forawait (const text of prediction) {
process.stdout.write(text);
}
const { stats } = await prediction;
if (stats.stopReason === "userStopped") {
console.log("Prediction was canceled by the user");
}