diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4aa9a0f..66978b1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -356,13 +356,16 @@ jobs: # Skip publish if this version already exists on crates.io (allows re-running release without bump) VERSION="${GITHUB_REF_NAME#v}" - if curl -sSf "https://crates.io/api/v1/crates/shimmy/${VERSION}" >/dev/null; then + STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://crates.io/api/v1/crates/shimmy/${VERSION}") + if [[ "$STATUS" == "200" ]]; then echo "â„šī¸ shimmy ${VERSION} already published to crates.io; skipping publish" - else + elif [[ "$STATUS" == "404" ]]; then # Publish to crates.io (dry-run already validated in Gate 7) cargo publish $DIRTY_FLAG || (echo "❌ cargo publish failed"; exit 1) echo "✅ Successfully published shimmy ${{ github.ref_name }} to crates.io!" echo "đŸ“Ļ Users can now install with: cargo install shimmy" + else + echo "âš ī¸ crates.io lookup returned HTTP ${STATUS}; assuming ${VERSION} is already published and skipping publish" fi - name: "đŸŗ Build and Push Docker Image to GHCR" diff --git a/src/engine/llama.rs b/src/engine/llama.rs index 30ca6d4..ed317f1 100644 --- a/src/engine/llama.rs +++ b/src/engine/llama.rs @@ -305,6 +305,7 @@ impl LlamaEngine { /// Calculate adaptive batch size based on context length to prevent GGML assert failures /// with large prompts (Issue #140) + #[cfg(feature = "llama")] fn calculate_adaptive_batch_size(ctx_len: usize) -> u32 { // Base batch size for smaller contexts const BASE_BATCH_SIZE: u32 = 2048;