mirror of
https://fastgit.cc/github.com/Michael-A-Kuykendall/shimmy
synced 2026-05-01 06:12:44 +08:00
style: apply cargo fmt (auto-indentation after module rename)
Cargo fmt automatically indented test functions after renaming mod issue_132_auto_stop_tokens to mod tests. This was not caught locally because we didn't run 'cargo fmt -- --check' before push. Root cause: Incomplete local validation workflow missing format check. Signed-off-by: Michael A. Kuykendall <michaelallenkuykendall@gmail.com>
This commit is contained in:
@@ -143,7 +143,8 @@ impl WorkflowEngine {
|
||||
}
|
||||
|
||||
// Check if workflow succeeded (all steps succeeded and all outputs found)
|
||||
let success = step_results.values().all(|result| result.success) && missing_outputs.is_empty();
|
||||
let success =
|
||||
step_results.values().all(|result| result.success) && missing_outputs.is_empty();
|
||||
|
||||
Ok(WorkflowResult {
|
||||
workflow_id: request.workflow.id,
|
||||
@@ -151,7 +152,10 @@ impl WorkflowEngine {
|
||||
step_results,
|
||||
outputs,
|
||||
error: if !missing_outputs.is_empty() {
|
||||
Some(format!("Missing output steps: {}", missing_outputs.join(", ")))
|
||||
Some(format!(
|
||||
"Missing output steps: {}",
|
||||
missing_outputs.join(", ")
|
||||
))
|
||||
} else if !success {
|
||||
Some("One or more steps failed".to_string())
|
||||
} else {
|
||||
|
||||
@@ -28,254 +28,254 @@
|
||||
mod tests {
|
||||
use shimmy::templates::TemplateFamily;
|
||||
|
||||
#[test]
|
||||
fn test_chatml_template_has_stop_tokens() {
|
||||
// Issue #132: ChatML templates must have stop tokens configured
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
#[test]
|
||||
fn test_chatml_template_has_stop_tokens() {
|
||||
// Issue #132: ChatML templates must have stop tokens configured
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
|
||||
// ChatML uses <|im_end|> and <|im_start|> as delimiters
|
||||
assert!(
|
||||
!stop_tokens.is_empty(),
|
||||
"ChatML template must have stop tokens configured"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|im_end|>".to_string()),
|
||||
"ChatML template must include <|im_end|> stop token"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|im_start|>".to_string()),
|
||||
"ChatML template must include <|im_start|> stop token"
|
||||
);
|
||||
}
|
||||
// ChatML uses <|im_end|> and <|im_start|> as delimiters
|
||||
assert!(
|
||||
!stop_tokens.is_empty(),
|
||||
"ChatML template must have stop tokens configured"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|im_end|>".to_string()),
|
||||
"ChatML template must include <|im_end|> stop token"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|im_start|>".to_string()),
|
||||
"ChatML template must include <|im_start|> stop token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_llama3_template_has_stop_tokens() {
|
||||
// Issue #132: Llama3 templates must have stop tokens configured
|
||||
let template = TemplateFamily::Llama3;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
#[test]
|
||||
fn test_llama3_template_has_stop_tokens() {
|
||||
// Issue #132: Llama3 templates must have stop tokens configured
|
||||
let template = TemplateFamily::Llama3;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
|
||||
// Llama3 uses <|eot_id|> and <|end_of_text|> as delimiters
|
||||
assert!(
|
||||
!stop_tokens.is_empty(),
|
||||
"Llama3 template must have stop tokens configured"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|eot_id|>".to_string()),
|
||||
"Llama3 template must include <|eot_id|> stop token"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|end_of_text|>".to_string()),
|
||||
"Llama3 template must include <|end_of_text|> stop token"
|
||||
);
|
||||
}
|
||||
// Llama3 uses <|eot_id|> and <|end_of_text|> as delimiters
|
||||
assert!(
|
||||
!stop_tokens.is_empty(),
|
||||
"Llama3 template must have stop tokens configured"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|eot_id|>".to_string()),
|
||||
"Llama3 template must include <|eot_id|> stop token"
|
||||
);
|
||||
assert!(
|
||||
stop_tokens.contains(&"<|end_of_text|>".to_string()),
|
||||
"Llama3 template must include <|end_of_text|> stop token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_openchat_template_stop_tokens() {
|
||||
// OpenChat doesn't use special tokens, so no stop tokens needed
|
||||
let template = TemplateFamily::OpenChat;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
assert!(
|
||||
stop_tokens.is_empty(),
|
||||
"OpenChat template should not have stop tokens"
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_openchat_template_stop_tokens() {
|
||||
// OpenChat doesn't use special tokens, so no stop tokens needed
|
||||
let template = TemplateFamily::OpenChat;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
assert!(
|
||||
stop_tokens.is_empty(),
|
||||
"OpenChat template should not have stop tokens"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gen_options_has_stop_tokens_field() {
|
||||
// Issue #132: GenOptions must support stop tokens
|
||||
use shimmy::engine::GenOptions;
|
||||
#[test]
|
||||
fn test_gen_options_has_stop_tokens_field() {
|
||||
// Issue #132: GenOptions must support stop tokens
|
||||
use shimmy::engine::GenOptions;
|
||||
|
||||
let opts = GenOptions::default();
|
||||
assert!(
|
||||
opts.stop_tokens.is_empty(),
|
||||
"Default GenOptions should have empty stop_tokens"
|
||||
);
|
||||
let opts = GenOptions::default();
|
||||
assert!(
|
||||
opts.stop_tokens.is_empty(),
|
||||
"Default GenOptions should have empty stop_tokens"
|
||||
);
|
||||
|
||||
// Test setting stop tokens
|
||||
let opts_with_stop = GenOptions {
|
||||
stop_tokens: vec!["<|im_end|>".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(opts_with_stop.stop_tokens.len(), 1);
|
||||
assert_eq!(opts_with_stop.stop_tokens[0], "<|im_end|>");
|
||||
}
|
||||
// Test setting stop tokens
|
||||
let opts_with_stop = GenOptions {
|
||||
stop_tokens: vec!["<|im_end|>".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(opts_with_stop.stop_tokens.len(), 1);
|
||||
assert_eq!(opts_with_stop.stop_tokens[0], "<|im_end|>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chat_completion_request_accepts_stop_tokens() {
|
||||
// Issue #132: OpenAI-compatible API must accept stop tokens
|
||||
use shimmy::openai_compat::ChatCompletionRequest;
|
||||
#[test]
|
||||
fn test_chat_completion_request_accepts_stop_tokens() {
|
||||
// Issue #132: OpenAI-compatible API must accept stop tokens
|
||||
use shimmy::openai_compat::ChatCompletionRequest;
|
||||
|
||||
// Test with single stop token
|
||||
let json_single = r#"{
|
||||
// Test with single stop token
|
||||
let json_single = r#"{
|
||||
"model": "test-model",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"stop": "<|im_end|>"
|
||||
}"#;
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_single).unwrap();
|
||||
assert!(req.stop.is_some(), "Request should parse single stop token");
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_single).unwrap();
|
||||
assert!(req.stop.is_some(), "Request should parse single stop token");
|
||||
|
||||
// Test with multiple stop tokens
|
||||
let json_multiple = r#"{
|
||||
// Test with multiple stop tokens
|
||||
let json_multiple = r#"{
|
||||
"model": "test-model",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"stop": ["<|im_end|>", "<|im_start|>"]
|
||||
}"#;
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_multiple).unwrap();
|
||||
assert!(
|
||||
req.stop.is_some(),
|
||||
"Request should parse multiple stop tokens"
|
||||
);
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_multiple).unwrap();
|
||||
assert!(
|
||||
req.stop.is_some(),
|
||||
"Request should parse multiple stop tokens"
|
||||
);
|
||||
|
||||
// Test without stop tokens (should still parse)
|
||||
let json_no_stop = r#"{
|
||||
// Test without stop tokens (should still parse)
|
||||
let json_no_stop = r#"{
|
||||
"model": "test-model",
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}"#;
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_no_stop).unwrap();
|
||||
assert!(
|
||||
req.stop.is_none(),
|
||||
"Request should parse without stop tokens"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_template_specific_stop_tokens_are_correct() {
|
||||
// Issue #132: Verify each template has the correct stop tokens
|
||||
// based on their respective specifications
|
||||
|
||||
// ChatML (used by Qwen, ChatGLM, gpt-oss, etc.)
|
||||
let chatml = TemplateFamily::ChatML;
|
||||
let chatml_stops = chatml.stop_tokens();
|
||||
assert_eq!(
|
||||
chatml_stops.len(),
|
||||
2,
|
||||
"ChatML should have exactly 2 stop tokens"
|
||||
);
|
||||
|
||||
// Llama3 (used by Meta Llama 3.x models)
|
||||
let llama3 = TemplateFamily::Llama3;
|
||||
let llama3_stops = llama3.stop_tokens();
|
||||
assert_eq!(
|
||||
llama3_stops.len(),
|
||||
2,
|
||||
"Llama3 should have exactly 2 stop tokens"
|
||||
);
|
||||
|
||||
// OpenChat (simple format without special tokens)
|
||||
let openchat = TemplateFamily::OpenChat;
|
||||
let openchat_stops = openchat.stop_tokens();
|
||||
assert_eq!(
|
||||
openchat_stops.len(),
|
||||
0,
|
||||
"OpenChat should have no stop tokens"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stop_tokens_prevent_template_leakage() {
|
||||
// Issue #132: The original problem - template tokens leaking into output
|
||||
// This test verifies the fix at the API contract level
|
||||
|
||||
use shimmy::templates::TemplateFamily;
|
||||
|
||||
// Simulate what happens with gpt-oss-20b (ChatML template)
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
|
||||
// Example problematic output from Issue #132
|
||||
let mut simulated_output = "Sure! Here's the answer: 42<|im_end|>".to_string();
|
||||
|
||||
// Verify stop tokens are configured
|
||||
assert!(!stop_tokens.is_empty(), "ChatML must have stop tokens");
|
||||
|
||||
// Simulate the truncation logic from generate()
|
||||
for stop_token in &stop_tokens {
|
||||
if let Some(pos) = simulated_output.rfind(stop_token) {
|
||||
simulated_output.truncate(pos);
|
||||
break;
|
||||
}
|
||||
let req: ChatCompletionRequest = serde_json::from_str(json_no_stop).unwrap();
|
||||
assert!(
|
||||
req.stop.is_none(),
|
||||
"Request should parse without stop tokens"
|
||||
);
|
||||
}
|
||||
|
||||
// After truncation, the stop token should be removed
|
||||
assert_eq!(
|
||||
simulated_output, "Sure! Here's the answer: 42",
|
||||
"Stop token should be removed from output"
|
||||
);
|
||||
assert!(
|
||||
!simulated_output.contains("<|im_end|>"),
|
||||
"Output should not contain <|im_end|> after truncation"
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_template_specific_stop_tokens_are_correct() {
|
||||
// Issue #132: Verify each template has the correct stop tokens
|
||||
// based on their respective specifications
|
||||
|
||||
#[test]
|
||||
fn test_multiple_stop_token_truncation() {
|
||||
// Issue #132: Test that the first matching stop token is used for truncation
|
||||
use shimmy::templates::TemplateFamily;
|
||||
// ChatML (used by Qwen, ChatGLM, gpt-oss, etc.)
|
||||
let chatml = TemplateFamily::ChatML;
|
||||
let chatml_stops = chatml.stop_tokens();
|
||||
assert_eq!(
|
||||
chatml_stops.len(),
|
||||
2,
|
||||
"ChatML should have exactly 2 stop tokens"
|
||||
);
|
||||
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
// Llama3 (used by Meta Llama 3.x models)
|
||||
let llama3 = TemplateFamily::Llama3;
|
||||
let llama3_stops = llama3.stop_tokens();
|
||||
assert_eq!(
|
||||
llama3_stops.len(),
|
||||
2,
|
||||
"Llama3 should have exactly 2 stop tokens"
|
||||
);
|
||||
|
||||
// Output with multiple potential stop points
|
||||
let mut output = "Hello<|im_start|>user\nMore text<|im_end|>".to_string();
|
||||
|
||||
// Find and truncate at the first occurrence
|
||||
for stop_token in &stop_tokens {
|
||||
if let Some(pos) = output.rfind(stop_token) {
|
||||
output.truncate(pos);
|
||||
break;
|
||||
}
|
||||
// OpenChat (simple format without special tokens)
|
||||
let openchat = TemplateFamily::OpenChat;
|
||||
let openchat_stops = openchat.stop_tokens();
|
||||
assert_eq!(
|
||||
openchat_stops.len(),
|
||||
0,
|
||||
"OpenChat should have no stop tokens"
|
||||
);
|
||||
}
|
||||
|
||||
// Should truncate at <|im_end|> since rfind finds last occurrence
|
||||
assert!(
|
||||
!output.contains("<|im_end|>"),
|
||||
"Should truncate at stop token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gen_options_serialization_with_stop_tokens() {
|
||||
// Issue #132: Verify GenOptions can be serialized/deserialized with stop tokens
|
||||
use shimmy::engine::GenOptions;
|
||||
|
||||
let opts = GenOptions {
|
||||
stop_tokens: vec!["<|im_end|>".to_string(), "<|im_start|>".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Serialize
|
||||
let json = serde_json::to_string(&opts).unwrap();
|
||||
assert!(
|
||||
json.contains("stop_tokens"),
|
||||
"Serialized JSON should include stop_tokens field"
|
||||
);
|
||||
|
||||
// Deserialize
|
||||
let deserialized: GenOptions = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(
|
||||
deserialized.stop_tokens.len(),
|
||||
2,
|
||||
"Deserialized GenOptions should preserve stop tokens"
|
||||
);
|
||||
assert_eq!(deserialized.stop_tokens[0], "<|im_end|>");
|
||||
assert_eq!(deserialized.stop_tokens[1], "<|im_start|>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stop_tokens_default_to_empty() {
|
||||
// Issue #132: Default GenOptions should have empty stop_tokens, not None
|
||||
use shimmy::engine::GenOptions;
|
||||
|
||||
let opts = GenOptions::default();
|
||||
assert!(
|
||||
opts.stop_tokens.is_empty(),
|
||||
"Default GenOptions should have empty stop_tokens vector"
|
||||
);
|
||||
|
||||
// Should be able to extend it
|
||||
let mut opts_extended = opts;
|
||||
opts_extended.stop_tokens.push("custom_stop".to_string());
|
||||
assert_eq!(opts_extended.stop_tokens.len(), 1);
|
||||
}
|
||||
#[test]
|
||||
fn test_stop_tokens_prevent_template_leakage() {
|
||||
// Issue #132: The original problem - template tokens leaking into output
|
||||
// This test verifies the fix at the API contract level
|
||||
|
||||
use shimmy::templates::TemplateFamily;
|
||||
|
||||
// Simulate what happens with gpt-oss-20b (ChatML template)
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
|
||||
// Example problematic output from Issue #132
|
||||
let mut simulated_output = "Sure! Here's the answer: 42<|im_end|>".to_string();
|
||||
|
||||
// Verify stop tokens are configured
|
||||
assert!(!stop_tokens.is_empty(), "ChatML must have stop tokens");
|
||||
|
||||
// Simulate the truncation logic from generate()
|
||||
for stop_token in &stop_tokens {
|
||||
if let Some(pos) = simulated_output.rfind(stop_token) {
|
||||
simulated_output.truncate(pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// After truncation, the stop token should be removed
|
||||
assert_eq!(
|
||||
simulated_output, "Sure! Here's the answer: 42",
|
||||
"Stop token should be removed from output"
|
||||
);
|
||||
assert!(
|
||||
!simulated_output.contains("<|im_end|>"),
|
||||
"Output should not contain <|im_end|> after truncation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_stop_token_truncation() {
|
||||
// Issue #132: Test that the first matching stop token is used for truncation
|
||||
use shimmy::templates::TemplateFamily;
|
||||
|
||||
let template = TemplateFamily::ChatML;
|
||||
let stop_tokens = template.stop_tokens();
|
||||
|
||||
// Output with multiple potential stop points
|
||||
let mut output = "Hello<|im_start|>user\nMore text<|im_end|>".to_string();
|
||||
|
||||
// Find and truncate at the first occurrence
|
||||
for stop_token in &stop_tokens {
|
||||
if let Some(pos) = output.rfind(stop_token) {
|
||||
output.truncate(pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Should truncate at <|im_end|> since rfind finds last occurrence
|
||||
assert!(
|
||||
!output.contains("<|im_end|>"),
|
||||
"Should truncate at stop token"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gen_options_serialization_with_stop_tokens() {
|
||||
// Issue #132: Verify GenOptions can be serialized/deserialized with stop tokens
|
||||
use shimmy::engine::GenOptions;
|
||||
|
||||
let opts = GenOptions {
|
||||
stop_tokens: vec!["<|im_end|>".to_string(), "<|im_start|>".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Serialize
|
||||
let json = serde_json::to_string(&opts).unwrap();
|
||||
assert!(
|
||||
json.contains("stop_tokens"),
|
||||
"Serialized JSON should include stop_tokens field"
|
||||
);
|
||||
|
||||
// Deserialize
|
||||
let deserialized: GenOptions = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(
|
||||
deserialized.stop_tokens.len(),
|
||||
2,
|
||||
"Deserialized GenOptions should preserve stop tokens"
|
||||
);
|
||||
assert_eq!(deserialized.stop_tokens[0], "<|im_end|>");
|
||||
assert_eq!(deserialized.stop_tokens[1], "<|im_start|>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stop_tokens_default_to_empty() {
|
||||
// Issue #132: Default GenOptions should have empty stop_tokens, not None
|
||||
use shimmy::engine::GenOptions;
|
||||
|
||||
let opts = GenOptions::default();
|
||||
assert!(
|
||||
opts.stop_tokens.is_empty(),
|
||||
"Default GenOptions should have empty stop_tokens vector"
|
||||
);
|
||||
|
||||
// Should be able to extend it
|
||||
let mut opts_extended = opts;
|
||||
opts_extended.stop_tokens.push("custom_stop".to_string());
|
||||
assert_eq!(opts_extended.stop_tokens.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user