diff --git a/src/engine/llama.rs b/src/engine/llama.rs index cd9f343..d5cd2e8 100644 --- a/src/engine/llama.rs +++ b/src/engine/llama.rs @@ -46,18 +46,19 @@ use tracing::info; use std::sync::OnceLock; #[cfg(feature = "llama")] -static LLAMA_BACKEND: OnceLock> = OnceLock::new(); +static LLAMA_BACKEND: OnceLock> = + OnceLock::new(); #[cfg(feature = "llama")] fn get_or_init_backend() -> Result<&'static shimmy_llama_cpp_2::llama_backend::LlamaBackend> { use anyhow::anyhow; - + let result = LLAMA_BACKEND.get_or_init(|| { info!("Initializing llama.cpp backend (first model load)"); shimmy_llama_cpp_2::llama_backend::LlamaBackend::init() .map_err(|e| format!("Failed to initialize llama backend: {}", e)) }); - + result.as_ref().map_err(|e| anyhow!("{}", e)) } @@ -273,7 +274,7 @@ impl InferenceEngine for LlamaEngine { use anyhow::anyhow; use shimmy_llama_cpp_2 as llama; use std::num::NonZeroU32; - + // Use global singleton backend (fixes Issue #128: BackendAlreadyInitialized) let be = get_or_init_backend()?; diff --git a/tests/regression/issue_128_backend_reinitialization.rs b/tests/regression/issue_128_backend_reinitialization.rs index 3a4fa8f..4a59798 100644 --- a/tests/regression/issue_128_backend_reinitialization.rs +++ b/tests/regression/issue_128_backend_reinitialization.rs @@ -1,32 +1,32 @@ -/// Regression test for Issue #128: BackendAlreadyInitialized error on second request -/// -/// GitHub: https://github.com/Michael-A-Kuykendall/shimmy/issues/128 -/// -/// **Bug**: First request works, second request fails with "BackendAlreadyInitialized" -/// **Root Cause**: llama.cpp backend was initialized on every model load -/// **Fix**: Use global OnceLock singleton to initialize backend once per process -/// **This test**: Verifies the backend singleton pattern is implemented correctly - -#[cfg(feature = "llama")] -#[test] -fn test_issue_128_backend_singleton_exists() { - // This test verifies that the backend singleton pattern is in place - // The actual fix prevents BackendAlreadyInitialized by using OnceLock - - // We can't easily test the actual behavior without a real model file, - // but we can verify the code compiles and the pattern is correct - - // If this test compiles and runs, the fix is in place: - // - OnceLock> is defined - // - get_or_init_backend() uses get_or_init() not get_or_try_init() - // - Multiple calls to load() won't re-initialize the backend - - assert!(true, "Backend singleton pattern is implemented"); -} - -#[cfg(not(feature = "llama"))] -#[test] -fn test_issue_128_requires_llama_feature() { - // This test requires the llama feature to be enabled - // Run with: cargo test --features llama -} +/// Regression test for Issue #128: BackendAlreadyInitialized error on second request +/// +/// GitHub: https://github.com/Michael-A-Kuykendall/shimmy/issues/128 +/// +/// **Bug**: First request works, second request fails with "BackendAlreadyInitialized" +/// **Root Cause**: llama.cpp backend was initialized on every model load +/// **Fix**: Use global OnceLock singleton to initialize backend once per process +/// **This test**: Verifies the backend singleton pattern is implemented correctly + +#[cfg(feature = "llama")] +#[test] +fn test_issue_128_backend_singleton_exists() { + // This test verifies that the backend singleton pattern is in place + // The actual fix prevents BackendAlreadyInitialized by using OnceLock + + // We can't easily test the actual behavior without a real model file, + // but we can verify the code compiles and the pattern is correct + + // If this test compiles and runs, the fix is in place: + // - OnceLock> is defined + // - get_or_init_backend() uses get_or_init() not get_or_try_init() + // - Multiple calls to load() won't re-initialize the backend + + assert!(true, "Backend singleton pattern is implemented"); +} + +#[cfg(not(feature = "llama"))] +#[test] +fn test_issue_128_requires_llama_feature() { + // This test requires the llama feature to be enabled + // Run with: cargo test --features llama +}