diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4aa9a0f..66978b1 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -356,13 +356,16 @@ jobs:
 
           # Skip publish if this version already exists on crates.io (allows re-running release without bump)
           VERSION="${GITHUB_REF_NAME#v}"
-          if curl -sSf "https://crates.io/api/v1/crates/shimmy/${VERSION}" >/dev/null; then
+          STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://crates.io/api/v1/crates/shimmy/${VERSION}")
+          if [[ "$STATUS" == "200" ]]; then
             echo "ℹ️ shimmy ${VERSION} already published to crates.io; skipping publish"
-          else
+          elif [[ "$STATUS" == "404" ]]; then
             # Publish to crates.io (dry-run already validated in Gate 7)
             cargo publish $DIRTY_FLAG || (echo "❌ cargo publish failed"; exit 1)
             echo "✅ Successfully published shimmy ${{ github.ref_name }} to crates.io!"
             echo "📦 Users can now install with: cargo install shimmy"
+          else
+            echo "⚠️ crates.io lookup returned HTTP ${STATUS}; assuming ${VERSION} is already published and skipping publish"
           fi
 
       - name: "🐳 Build and Push Docker Image to GHCR"
diff --git a/src/engine/llama.rs b/src/engine/llama.rs
index 30ca6d4..ed317f1 100644
--- a/src/engine/llama.rs
+++ b/src/engine/llama.rs
@@ -305,6 +305,7 @@ impl LlamaEngine {
 
     /// Calculate adaptive batch size based on context length to prevent GGML assert failures
     /// with large prompts (Issue #140)
+    #[cfg(feature = "llama")]
     fn calculate_adaptive_batch_size(ctx_len: usize) -> u32 {
         // Base batch size for smaller contexts
         const BASE_BATCH_SIZE: u32 = 2048;