mirror of
https://fastgit.cc/github.com/Michael-A-Kuykendall/shimmy
synced 2026-05-01 06:12:44 +08:00
- Add comprehensive MLX engine implementation with Python MLX bindings - Implement MLX model discovery, loading, and native inference pipeline - Add MLX feature flag compilation and Apple Silicon hardware detection - Create dedicated GitHub Actions workflow for MLX testing on macos-14 ARM64 - Add MLX documentation to README and wiki with capability descriptions - Implement pre-commit hooks enforcing cargo fmt, clippy, and test validation - Fix GPU backend tests to properly force specific backends instead of auto-detection - Resolve property test race conditions with serial test execution - Update release workflow validation and platform-specific test expectations - Add MLX implementation plan and cross-compilation toolchain support 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
4.6 KiB
4.6 KiB
Shimmy Integrations
Shimmy works with any tool that supports the OpenAI API. Here are the most popular integrations:
Code Editors
VSCode + Copilot
// settings.json
{
"github.copilot.advanced": {
"serverUrl": "http://localhost:11435"
}
}
Cursor
- Open Settings (Ctrl/Cmd + ,)
- Go to "AI" settings
- Set custom endpoint:
http://localhost:11435
Continue.dev
// ~/.continue/config.json
{
"models": [{
"title": "Local Shimmy",
"provider": "openai",
"model": "your-model-name",
"apiBase": "http://localhost:11435/v1"
}],
"tabAutocompleteModel": {
"title": "Local Shimmy Tab",
"provider": "openai",
"model": "your-model-name",
"apiBase": "http://localhost:11435/v1"
}
}
Neovim + Copilot.lua
-- copilot.lua config
require('copilot').setup({
server_opts_overrides = {
settings = {
["*"] = {
["*"] = {
editorConfiguration = {
enableAutoCompletions = true,
},
advanced = {
serverUrl = "http://localhost:11435"
}
}
}
}
}
})
Programming Languages
Python
import openai
client = openai.OpenAI(
base_url="http://localhost:11435/v1",
api_key="not-needed"
)
response = client.chat.completions.create(
model="your-model-name",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
JavaScript/TypeScript
import OpenAI from 'openai';
const openai = new OpenAI({
baseURL: 'http://localhost:11435/v1',
apiKey: 'not-needed'
});
const completion = await openai.chat.completions.create({
model: 'your-model-name',
messages: [{ role: 'user', content: 'Hello!' }],
});
console.log(completion.choices[0].message.content);
Rust
// Using reqwest
use serde_json::json;
let client = reqwest::Client::new();
let response = client
.post("http://localhost:11435/v1/chat/completions")
.json(&json!({
"model": "your-model-name",
"messages": [{"role": "user", "content": "Hello!"}]
}))
.send()
.await?;
Go
package main
import (
"context"
"fmt"
"github.com/sashabaranov/go-openai"
)
func main() {
config := openai.DefaultConfig("not-needed")
config.BaseURL = "http://localhost:11435/v1"
client := openai.NewClientWithConfig(config)
resp, err := client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "your-model-name",
Messages: []openai.ChatCompletionMessage{
{Role: "user", Content: "Hello!"},
},
},
)
if err != nil {
panic(err)
}
fmt.Println(resp.Choices[0].Message.Content)
}
CLI Tools
LLM (Simon Willison's tool)
# Install: pipx install llm
llm install llm-openai-compatible
# Configure
llm keys set openai-compatible
# API Key: not-needed
# Base URL: http://localhost:11435/v1
# Use
llm chat -m your-model-name "Hello!"
Aider
# Install: pipx install aider-chat
aider --openai-api-base http://localhost:11435/v1 --model your-model-name
Docker
Run Shimmy in Docker
FROM ubuntu:22.04
COPY shimmy /usr/local/bin/
COPY models/ /models/
ENV SHIMMY_BASE_GGUF=/models/your-model.gguf
EXPOSE 11435
CMD ["shimmy", "serve", "--bind", "0.0.0.0:11435"]
docker build -t shimmy .
docker run -p 11435:11435 shimmy
Testing Tools
Simple curl test
curl -X POST http://localhost:11435/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "your-model-name",
"messages": [{"role": "user", "content": "Say hello!"}],
"max_tokens": 10
}'
Streaming test
curl -X POST http://localhost:11435/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "your-model-name",
"messages": [{"role": "user", "content": "Count to 5"}],
"max_tokens": 20,
"stream": true
}'
Common Issues
Model name not found: Use ./shimmy list to see available model names
Wrong port: Shimmy defaults to 11435, but you can change with --bind
Performance issues: Try a smaller/faster model like Phi-3-mini
Tool not working: Ensure the tool supports custom OpenAI base URLs
Missing an integration? Open an issue and we'll add it!