Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: move tensorrt executable to engine #2400

Merged
merged 6 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
chore: update path for tensorrt engine (#2404)
Signed-off-by: James <[email protected]>
Co-authored-by: James <[email protected]>
  • Loading branch information
namchuai and James committed Mar 17, 2024
commit fce229ae0ad9a4d5ec9305103367e2eca314c157
2 changes: 2 additions & 0 deletions extensions/tensorrt-llm-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
"0.1.0"
]
},
"tensorrtVersion": "0.1.6",
"provider": "nitro-tensorrt-llm",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"build:publish:win32": "rimraf *.tgz --glob && npm run build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
Expand Down
7 changes: 5 additions & 2 deletions extensions/tensorrt-llm-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ export default [
plugins: [
replace({
EXTENSION_NAME: JSON.stringify(packageJson.name),
TENSORRT_VERSION: JSON.stringify('0.1.6'),
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
PROVIDER: JSON.stringify(packageJson.provider),
DOWNLOAD_RUNNER_URL:
process.platform === 'darwin' || process.platform === 'win32'
process.platform === 'win32'
? JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
)
Expand Down Expand Up @@ -53,6 +54,8 @@ export default [
plugins: [
replace({
EXTENSION_NAME: JSON.stringify(packageJson.name),
TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
PROVIDER: JSON.stringify(packageJson.provider),
LOAD_MODEL_URL: JSON.stringify(
`${packageJson.config?.protocol ?? 'http'}:https://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
),
Expand Down
1 change: 1 addition & 0 deletions extensions/tensorrt-llm-extension/src/@types/global.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ declare const DOWNLOAD_RUNNER_URL: string
declare const TENSORRT_VERSION: string
declare const COMPATIBILITY: object
declare const EXTENSION_NAME: string
declare const PROVIDER: string
16 changes: 10 additions & 6 deletions extensions/tensorrt-llm-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
* Override custom function name for loading and unloading model
* Which are implemented from node module
*/
override provider = 'nitro-tensorrt-llm'
override provider = PROVIDER
override inferenceUrl = INFERENCE_URL
override nodeModule = NODE

Expand Down Expand Up @@ -86,12 +86,13 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
}

const janDataFolderPath = await getJanDataFolderPath()
const extensionName = EXTENSION_NAME
const engineVersion = TENSORRT_VERSION

const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
extensionName,
this.provider,
engineVersion,
firstGpu.arch,
])

Expand Down Expand Up @@ -201,11 +202,14 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
)
return 'NotInstalled'
}
const extensionName = EXTENSION_NAME
const janDataFolderPath = await getJanDataFolderPath()
const engineVersion = TENSORRT_VERSION

const enginePath = await joinPath([
await getJanDataFolderPath(),
janDataFolderPath,
'engines',
extensionName,
this.provider,
engineVersion,
firstGpu.arch,
osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
])
Expand Down
16 changes: 12 additions & 4 deletions extensions/tensorrt-llm-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
* Initializes a engine subprocess to load a machine learning model.
* @param params - The model load settings.
*/
async function loadModel(params: any, systemInfo?: SystemInformation): Promise<{ error: Error | undefined }> {
async function loadModel(
params: any,
systemInfo?: SystemInformation
): Promise<{ error: Error | undefined }> {
// modelFolder is the absolute path to the running model folder
// e.g. ~/jan/models/llama-2
let modelFolder = params.modelFolder
Expand Down Expand Up @@ -73,7 +76,10 @@ function unloadModel(): Promise<any> {
* 2. Load model into engine subprocess
* @returns
*/
async function runEngineAndLoadModel(settings: ModelLoadParams, systemInfo: SystemInformation) {
async function runEngineAndLoadModel(
settings: ModelLoadParams,
systemInfo: SystemInformation
) {
return unloadModel()
.then(() => runEngine(systemInfo))
.then(() => loadModelRequest(settings))
Expand Down Expand Up @@ -150,15 +156,17 @@ async function runEngine(systemInfo: SystemInformation): Promise<void> {
)
}
const janDataFolderPath = await getJanDataFolderPath()
const extensionName = EXTENSION_NAME
const tensorRtVersion = TENSORRT_VERSION
const provider = PROVIDER

return new Promise<void>((resolve, reject) => {
// Current directory by default

const executableFolderPath = path.join(
janDataFolderPath,
'engines',
extensionName,
provider,
tensorRtVersion,
gpuArch
)
const nitroExecutablePath = path.join(
Expand Down
Loading